diff --git a/.clang-format b/.clang-format
new file mode 100644
index 000000000..06453dfbb
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+---
+BasedOnStyle:  Google
+DerivePointerAlignment: false
+ColumnLimit: 90
diff --git a/.github/workflows/report_ram_log.yml b/.github/workflows/report_ram_log.yml
index 3067e2e73..e63d34dd8 100644
--- a/.github/workflows/report_ram_log.yml
+++ b/.github/workflows/report_ram_log.yml
@@ -50,7 +50,7 @@ jobs:
       - name: Install OAP optimized Arrow
         run: |
           cd /tmp
-          git clone -b arrow-3.0.0-oap https://github.com/oap-project/arrow.git
+          git clone -b arrow-3.0.0-oap-1.1 https://github.com/oap-project/arrow.git
           cd arrow/java
           mvn clean install -B -P arrow-jni -am -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn -Darrow.cpp.build.dir=/tmp/arrow/cpp/build/release/ -DskipTests -Dcheckstyle.skip
       - name: Run Maven tests
diff --git a/.github/workflows/tpch.yml b/.github/workflows/tpch.yml
index b3a4bd19f..650563ff2 100644
--- a/.github/workflows/tpch.yml
+++ b/.github/workflows/tpch.yml
@@ -42,7 +42,7 @@ jobs:
         run: |
           cd /tmp
           git clone https://github.com/oap-project/arrow.git
-          cd arrow && git checkout arrow-3.0.0-oap && cd cpp
+          cd arrow && git checkout arrow-3.0.0-oap-1.1 && cd cpp
           mkdir build && cd build
           cmake .. -DARROW_JNI=ON -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_HDFS=ON -DARROW_FILESYSTEM=ON -DARROW_WITH_SNAPPY=ON -DARROW_JSON=ON -DARROW_DATASET=ON -DARROW_WITH_LZ4=ON -DARROW_JEMALLOC=OFF && make -j2
           sudo make install
diff --git a/.github/workflows/unittests.yml b/.github/workflows/unittests.yml
index 83020c709..54fc8f660 100644
--- a/.github/workflows/unittests.yml
+++ b/.github/workflows/unittests.yml
@@ -45,7 +45,7 @@ jobs:
         run: |
           cd /tmp
           git clone https://github.com/oap-project/arrow.git
-          cd arrow && git checkout arrow-3.0.0-oap && cd cpp
+          cd arrow && git checkout arrow-3.0.0-oap-1.1 && cd cpp
           mkdir build && cd build
           cmake .. -DARROW_JNI=ON -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_HDFS=ON -DARROW_FILESYSTEM=ON -DARROW_WITH_SNAPPY=ON -DARROW_JSON=ON -DARROW_DATASET=ON -DARROW_WITH_LZ4=ON -DGTEST_ROOT=/usr/src/gtest && make -j2
           sudo make install
@@ -59,3 +59,14 @@ jobs:
           cd src
           ctest -R
 
+  formatting-check:
+    name: Formatting Check
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Run clang-format style check for C/C++ programs.
+      uses: jidicula/clang-format-action@v3.2.0
+      with:
+        clang-format-version: '10'
+        check-path: 'native-sql-engine/cpp/src'
+        fallback-style: 'Google' # optional
diff --git a/README.md b/README.md
index 6e90b2659..20b0a6f55 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ With [Spark 27396](https://issues.apache.org/jira/browse/SPARK-27396) its possib
 
 ![Overview](./docs/image/dataset.png)
 
-A native parquet reader was developed to speed up the data loading. it's based on Apache Arrow Dataset. For details please check [Arrow Data Source](https://github.com/oap-project/arrow-data-source)
+A native parquet reader was developed to speed up the data loading. it's based on Apache Arrow Dataset. For details please check [Arrow Data Source](https://github.com/oap-project/native-sql-engine/tree/master/arrow-data-source)
 
 ### Apache Arrow Compute/Gandiva based operators
 
@@ -101,7 +101,7 @@ orders.createOrReplaceTempView("orders")
 spark.sql("select * from orders where o_orderdate > date '1998-07-26'").show(20000, false)
 ```
 
-The result should showup on Spark console and you can check the DAG diagram with some Columnar Processing stage.
+The result should showup on Spark console and you can check the DAG diagram with some Columnar Processing stage. Native SQL engine still lacks some features, please check out the [limitations](./docs/limitations.md).
 
 
 ## Performance data
diff --git a/arrow-data-source/docs/Installation.md b/arrow-data-source/docs/Installation.md
index 604829663..1b172ba50 100644
--- a/arrow-data-source/docs/Installation.md
+++ b/arrow-data-source/docs/Installation.md
@@ -11,19 +11,16 @@ yum install gmock
 
 ## Build Native SQL Engine
 
-``` shell
-git clone -b ${version} https://github.com/oap-project/native-sql-engine.git
-cd oap-native-sql
-cd cpp/
-mkdir build/
-cd build/
-cmake .. -DTESTS=ON
-make -j
-```
+cmake parameters:
+BUILD_ARROW(Default is On): Build Arrow from Source
+STATIC_ARROW(Default is Off): When BUILD_ARROW is ON, you can choose to build static or shared Arrow library, please notice current only support to build SHARED ARROW.
+ARROW_ROOT(Default is /usr/local): When BUILD_ARROW is OFF, you can set the ARROW library path to link the existing library in your environment.
+BUILD_PROTOBUF(Default is On): Build Protobuf from Source
 
 ``` shell
-cd ../../core/
-mvn clean package -DskipTests
+git clone -b ${version} https://github.com/oap-project/native-sql-engine.git
+cd native-sql-engine
+mvn clean package -am -DskipTests -Dcpp_tests=OFF -Dbuild_arrow=ON -Dstatic_arrow=OFF -Darrow_root=/usr/local -Dbuild_protobuf=ON
 ```
 
 ### Additonal Notes
diff --git a/docs/ApacheArrowInstallation.md b/docs/ApacheArrowInstallation.md
new file mode 100644
index 000000000..4e0647f74
--- /dev/null
+++ b/docs/ApacheArrowInstallation.md
@@ -0,0 +1,70 @@
+# llvm-7.0: 
+Arrow Gandiva depends on LLVM, and I noticed current version strictly depends on llvm7.0 if you installed any other version rather than 7.0, it will fail.
+``` shell
+wget http://releases.llvm.org/7.0.1/llvm-7.0.1.src.tar.xz
+tar xf llvm-7.0.1.src.tar.xz
+cd llvm-7.0.1.src/
+cd tools
+wget http://releases.llvm.org/7.0.1/cfe-7.0.1.src.tar.xz
+tar xf cfe-7.0.1.src.tar.xz
+mv cfe-7.0.1.src clang
+cd ..
+mkdir build
+cd build
+cmake .. -DCMAKE_BUILD_TYPE=Release
+cmake --build . -j
+cmake --build . --target install
+# check if clang has also been compiled, if no
+cd tools/clang
+mkdir build
+cd build
+cmake ..
+make -j
+make install
+```
+
+# cmake: 
+Arrow will download package during compiling, in order to support SSL in cmake, build cmake is optional.
+``` shell
+wget https://github.com/Kitware/CMake/releases/download/v3.15.0-rc4/cmake-3.15.0-rc4.tar.gz
+tar xf cmake-3.15.0-rc4.tar.gz
+cd cmake-3.15.0-rc4/
+./bootstrap --system-curl --parallel=64 #parallel num depends on your server core number
+make -j
+make install
+cmake --version
+cmake version 3.15.0-rc4
+```
+
+# Apache Arrow
+``` shell
+git clone https://github.com/Intel-bigdata/arrow.git
+cd arrow && git checkout branch-0.17.0-oap-1.0
+mkdir -p arrow/cpp/release-build
+cd arrow/cpp/release-build
+cmake -DARROW_DEPENDENCY_SOURCE=BUNDLED -DARROW_GANDIVA_JAVA=ON -DARROW_GANDIVA=ON -DARROW_PARQUET=ON -DARROW_HDFS=ON -DARROW_BOOST_USE_SHARED=ON -DARROW_JNI=ON -DARROW_DATASET=ON -DARROW_WITH_PROTOBUF=ON -DARROW_WITH_SNAPPY=ON -DARROW_WITH_LZ4=ON -DARROW_FILESYSTEM=ON -DARROW_JSON=ON ..
+make -j
+make install
+
+# build java
+cd ../../java
+# change property 'arrow.cpp.build.dir' to the relative path of cpp build dir in gandiva/pom.xml
+mvn clean install -P arrow-jni -am -Darrow.cpp.build.dir=../cpp/release-build/release/ -DskipTests 
+# if you are behine proxy, please also add proxy for socks
+mvn clean install -P arrow-jni -am -Darrow.cpp.build.dir=../cpp/release-build/release/ -DskipTests -DsocksProxyHost=${proxyHost} -DsocksProxyPort=1080 
+```
+
+run test
+``` shell
+mvn test -pl adapter/parquet -P arrow-jni
+mvn test -pl gandiva -P arrow-jni
+```
+
+# Copy binary files to oap-native-sql resources directory
+Because oap-native-sql plugin will build a stand-alone jar file with arrow dependency, if you choose to build Arrow by yourself, you have to copy below files as a replacement from the original one.
+You can find those files in Apache Arrow installation directory or release directory. Below example assume Apache Arrow has been installed on /usr/local/lib64
+``` shell
+cp /usr/local/lib64/libarrow.so.17 $native-sql-engine-dir/cpp/src/resources
+cp /usr/local/lib64/libgandiva.so.17 $native-sql-engine-dir/cpp/src/resources
+cp /usr/local/lib64/libparquet.so.17 $native-sql-engine-dir/cpp/src/resources
+``` 
diff --git a/docs/Configuration.md b/docs/Configuration.md
new file mode 100644
index 000000000..b20b46f0e
--- /dev/null
+++ b/docs/Configuration.md
@@ -0,0 +1,29 @@
+# Spark Configurations for Native SQL Engine
+
+Add below configuration to spark-defaults.conf
+
+```
+##### Columnar Process Configuration
+
+spark.sql.sources.useV1SourceList avro
+spark.sql.join.preferSortMergeJoin false
+spark.sql.extensions com.intel.oap.ColumnarPlugin
+spark.shuffle.manager org.apache.spark.shuffle.sort.ColumnarShuffleManager
+
+# note native sql engine depends on arrow data source
+spark.driver.extraClassPath $HOME/miniconda2/envs/oapenv/oap_jars/spark-columnar-core-<version>-jar-with-dependencies.jar:$HOME/miniconda2/envs/oapenv/oap_jars/spark-arrow-datasource-standard-<version>-jar-with-dependencies.jar
+spark.executor.extraClassPath $HOME/miniconda2/envs/oapenv/oap_jars/spark-columnar-core-<version>-jar-with-dependencies.jar:$HOME/miniconda2/envs/oapenv/oap_jars/spark-arrow-datasource-standard-<version>-jar-with-dependencies.jar
+
+spark.executorEnv.LIBARROW_DIR      $HOME/miniconda2/envs/oapenv
+spark.executorEnv.CC                $HOME/miniconda2/envs/oapenv/bin/gcc
+######
+```
+
+Before you start spark, you must use below command to add some environment variables.
+
+```
+export CC=$HOME/miniconda2/envs/oapenv/bin/gcc
+export LIBARROW_DIR=$HOME/miniconda2/envs/oapenv/
+```
+
+About arrow-data-source.jar, you can refer [Unified Arrow Data Source ](https://oap-project.github.io/arrow-data-source/).
diff --git a/docs/Installation.md b/docs/Installation.md
new file mode 100644
index 000000000..604829663
--- /dev/null
+++ b/docs/Installation.md
@@ -0,0 +1,30 @@
+# Spark Native SQL Engine Installation
+
+For detailed testing scripts, please refer to [solution guide](https://github.com/Intel-bigdata/Solution_navigator/tree/master/nativesql)
+
+## Install Googletest and Googlemock
+
+``` shell
+yum install gtest-devel
+yum install gmock
+```
+
+## Build Native SQL Engine
+
+``` shell
+git clone -b ${version} https://github.com/oap-project/native-sql-engine.git
+cd oap-native-sql
+cd cpp/
+mkdir build/
+cd build/
+cmake .. -DTESTS=ON
+make -j
+```
+
+``` shell
+cd ../../core/
+mvn clean package -DskipTests
+```
+
+### Additonal Notes
+[Notes for Installation Issues](./InstallationNotes.md)
diff --git a/docs/InstallationNotes.md b/docs/InstallationNotes.md
new file mode 100644
index 000000000..cf7120be9
--- /dev/null
+++ b/docs/InstallationNotes.md
@@ -0,0 +1,47 @@
+### Notes for Installation Issues
+* Before the Installation, if you have installed other version of oap-native-sql, remove all installed lib and include from system path: libarrow* libgandiva* libspark-columnar-jni*
+
+* libgandiva_jni.so was not found inside JAR
+
+change property 'arrow.cpp.build.dir' to $ARROW_DIR/cpp/release-build/release/ in gandiva/pom.xml. If you do not want to change the contents of pom.xml, specify it like this:
+
+```
+mvn clean install -P arrow-jni -am -Darrow.cpp.build.dir=/root/git/t/arrow/cpp/release-build/release/ -DskipTests -Dcheckstyle.skip
+```
+
+* No rule to make target '../src/protobuf_ep', needed by `src/proto/Exprs.pb.cc'
+
+remove the existing libprotobuf installation, then the script for find_package() will be able to download protobuf.
+
+* can't find the libprotobuf.so.13 in the shared lib
+
+copy the libprotobuf.so.13 from $OAP_DIR/oap-native-sql/cpp/src/resources to /usr/lib64/
+
+* unable to load libhdfs: libgsasl.so.7: cannot open shared object file
+
+libgsasl is missing, run `yum install libgsasl`
+
+* CentOS 7.7 looks like didn't provide the glibc we required, so binaries packaged on F30 won't work.
+
+```
+20/04/21 17:46:17 WARN TaskSetManager: Lost task 0.1 in stage 1.0 (TID 2, 10.0.0.143, executor 6): java.lang.UnsatisfiedLinkError: /tmp/libgandiva_jni.sobe729912-3bbe-4bd0-bb96-4c7ce2e62336: /lib64/libm.so.6: version `GLIBC_2.29' not found (required by /tmp/libgandiva_jni.sobe729912-3bbe-4bd0-bb96-4c7ce2e62336)
+```
+
+* Missing symbols due to old GCC version.
+
+```
+[root@vsr243 release-build]# nm /usr/local/lib64/libparquet.so | grep ZN5boost16re_detail_10710012perl_matcherIN9__gnu_cxx17__normal_iteratorIPKcSsEESaINS_9sub_matchIS6_EEENS_12regex_traitsIcNS_16cpp_regex_traitsIcEEEEE14construct_initERKNS_11basic_regexIcSD_EENS_15regex_constants12_match_flagsE
+_ZN5boost16re_detail_10710012perl_matcherIN9__gnu_cxx17__normal_iteratorIPKcSsEESaINS_9sub_matchIS6_EEENS_12regex_traitsIcNS_16cpp_regex_traitsIcEEEEE14construct_initERKNS_11basic_regexIcSD_EENS_15regex_constants12_match_flagsE
+```
+
+Need to compile all packags with newer GCC:
+
+```
+[root@vsr243 ~]# export CXX=/usr/local/bin/g++
+[root@vsr243 ~]# export CC=/usr/local/bin/gcc
+```
+
+* Can not connect to hdfs @sr602
+
+vsr606, vsr243 are both not able to connect to hdfs @sr602, need to skipTests to generate the jar
+
diff --git a/docs/OAP-Developer-Guide.md b/docs/OAP-Developer-Guide.md
new file mode 100644
index 000000000..8d7ac6abf
--- /dev/null
+++ b/docs/OAP-Developer-Guide.md
@@ -0,0 +1,109 @@
+# OAP Developer Guide
+
+This document contains the instructions & scripts on installing necessary dependencies and building OAP. 
+You can get more detailed information from OAP each module below.
+
+* [SQL Index and Data Source Cache](https://github.com/oap-project/sql-ds-cache/blob/master/docs/Developer-Guide.md)
+* [PMem Common](https://github.com/oap-project/pmem-common)
+* [PMem Shuffle](https://github.com/oap-project/pmem-shuffle#5-install-dependencies-for-shuffle-remote-pmem-extension)
+* [Remote Shuffle](https://github.com/oap-project/remote-shuffle)
+* [OAP MLlib](https://github.com/oap-project/oap-mllib)
+* [Arrow Data Source](https://github.com/oap-project/arrow-data-source)
+* [Native SQL Engine](https://github.com/oap-project/native-sql-engine)
+
+## Building OAP
+
+### Prerequisites for Building
+
+OAP is built with [Apache Maven](http://maven.apache.org/) and Oracle Java 8, and mainly required tools to install on your cluster are listed below.
+
+- [Cmake](https://help.directadmin.com/item.php?id=494)
+- [GCC > 7](https://gcc.gnu.org/wiki/InstallingGCC)
+- [Memkind](https://github.com/memkind/memkind/tree/v1.10.1-rc2)
+- [Vmemcache](https://github.com/pmem/vmemcache)
+- [HPNL](https://github.com/Intel-bigdata/HPNL)
+- [PMDK](https://github.com/pmem/pmdk)  
+- [OneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html)
+- [Arrow](https://github.com/Intel-bigdata/arrow)
+
+- **Requirements for Shuffle Remote PMem Extension**  
+If enable Shuffle Remote PMem extension with RDMA, you can refer to [PMem Shuffle](https://github.com/oap-project/pmem-shuffle) to configure and validate RDMA in advance.
+
+We provide scripts below to help automatically install dependencies above **except RDMA**, need change to **root** account, run:
+
+```
+# git clone -b <tag-version> https://github.com/Intel-bigdata/OAP.git
+# cd OAP
+# sh $OAP_HOME/dev/install-compile-time-dependencies.sh
+```
+
+Run the following command to learn more.
+
+```
+# sh $OAP_HOME/dev/scripts/prepare_oap_env.sh --help
+```
+
+Run the following command to automatically install specific dependency such as Maven.
+
+```
+# sh $OAP_HOME/dev/scripts/prepare_oap_env.sh --prepare_maven
+```
+
+
+### Building
+
+To build OAP package, run command below then you can find a tarball named `oap-$VERSION-bin-spark-$VERSION.tar.gz` under directory `$OAP_HOME/dev/release-package `.
+```
+$ sh $OAP_HOME/dev/compile-oap.sh
+```
+
+Building Specified OAP Module, such as `oap-cache`, run:
+```
+$ sh $OAP_HOME/dev/compile-oap.sh --oap-cache
+```
+
+
+### Running OAP Unit Tests
+
+Setup building environment manually for intel MLlib, and if your default GCC version is before 7.0 also need export `CC` & `CXX` before using `mvn`, run
+
+```
+$ export CXX=$OAP_HOME/dev/thirdparty/gcc7/bin/g++
+$ export CC=$OAP_HOME/dev/thirdparty/gcc7/bin/gcc
+$ export ONEAPI_ROOT=/opt/intel/inteloneapi
+$ source /opt/intel/inteloneapi/daal/2021.1-beta07/env/vars.sh
+$ source /opt/intel/inteloneapi/tbb/2021.1-beta07/env/vars.sh
+$ source /tmp/oneCCL/build/_install/env/setvars.sh
+```
+
+Run all the tests:
+
+```
+$ mvn clean test
+```
+
+Run Specified OAP Module Unit Test, such as `oap-cache`:
+
+```
+$ mvn clean -pl com.intel.oap:oap-cache -am test
+
+```
+
+### Building SQL Index and Data Source Cache with PMem
+
+#### Prerequisites for building with PMem support
+
+When using SQL Index and Data Source Cache with PMem, finish steps of [Prerequisites for building](#prerequisites-for-building) to ensure needed dependencies have been installed.
+
+#### Building package
+
+You can build OAP with PMem support with command below:
+
+```
+$ sh $OAP_HOME/dev/compile-oap.sh
+```
+Or run:
+
+```
+$ mvn clean -q -Ppersistent-memory -Pvmemcache -DskipTests package
+```
diff --git a/docs/OAP-Installation-Guide.md b/docs/OAP-Installation-Guide.md
new file mode 100644
index 000000000..e3b229805
--- /dev/null
+++ b/docs/OAP-Installation-Guide.md
@@ -0,0 +1,69 @@
+# OAP Installation Guide
+This document introduces how to install OAP and its dependencies on your cluster nodes by ***Conda***. 
+Follow steps below on ***every node*** of your cluster to set right environment for each machine.
+
+## Contents
+  - [Prerequisites](#prerequisites)
+  - [Installing OAP](#installing-oap)
+  - [Configuration](#configuration)
+
+## Prerequisites 
+
+- **OS Requirements**  
+We have tested OAP on Fedora 29 and CentOS 7.6 (kernel-4.18.16). We recommend you use **Fedora 29 CentOS 7.6 or above**. Besides, for [Memkind](https://github.com/memkind/memkind/tree/v1.10.1-rc2) we recommend you use **kernel above 3.10**.
+
+- **Conda Requirements**   
+Install Conda on your cluster nodes with below commands and follow the prompts on the installer screens.:
+```bash
+$ wget -c https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
+$ chmod +x Miniconda2-latest-Linux-x86_64.sh 
+$ bash Miniconda2-latest-Linux-x86_64.sh 
+```
+For changes to take effect, close and re-open your current shell. To test your installation,  run the command `conda list` in your terminal window. A list of installed packages appears if it has been installed correctly.
+
+## Installing OAP
+
+Dependencies below are required by OAP and all of them are included in OAP Conda package, they will be automatically installed in your cluster when you Conda install OAP. Ensure you have activated environment which you created in the previous steps.
+
+- [Arrow](https://github.com/Intel-bigdata/arrow)
+- [Plasma](http://arrow.apache.org/blog/2017/08/08/plasma-in-memory-object-store/)
+- [Memkind](https://anaconda.org/intel/memkind)
+- [Vmemcache](https://anaconda.org/intel/vmemcache)
+- [HPNL](https://anaconda.org/intel/hpnl)
+- [PMDK](https://github.com/pmem/pmdk)  
+- [OneAPI](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html)
+
+
+Create a conda environment and install OAP Conda package.
+```bash
+$ conda create -n oapenv -y python=3.7
+$ conda activate oapenv
+$ conda install -c conda-forge -c intel -y oap=1.0.0
+```
+
+Once finished steps above, you have completed OAP dependencies installation and OAP building, and will find built OAP jars under `$HOME/miniconda2/envs/oapenv/oap_jars`
+
+#### Extra Steps for Shuffle Remote PMem Extension
+
+If you use one of OAP features -- [PMmem Shuffle](https://github.com/oap-project/pmem-shuffle) with **RDMA**, you need to configure and validate RDMA, please refer to [PMem Shuffle](https://github.com/oap-project/pmem-shuffle#4-configure-and-validate-rdma) for the details.
+
+
+##  Configuration
+
+Once finished steps above, make sure libraries installed by Conda can be linked by Spark, please add the following configuration settings to `$SPARK_HOME/conf/spark-defaults.conf`.
+
+```
+spark.executorEnv.LD_LIBRARY_PATH   $HOME/miniconda2/envs/oapenv/lib
+spark.executor.extraLibraryPath     $HOME/miniconda2/envs/oapenv/lib
+spark.driver.extraLibraryPath       $HOME/miniconda2/envs/oapenv/lib
+spark.executor.extraClassPath       $HOME/miniconda2/envs/oapenv/oap_jars/$OAP_FEATURE.jar
+spark.driver.extraClassPath         $HOME/miniconda2/envs/oapenv/oap_jars/$OAP_FEATURE.jar
+```
+
+And then you can follow the corresponding feature documents for more details to use them.
+
+
+
+
+
+
diff --git a/docs/Prerequisite.md b/docs/Prerequisite.md
new file mode 100644
index 000000000..5ff82aa1b
--- /dev/null
+++ b/docs/Prerequisite.md
@@ -0,0 +1,151 @@
+# Prerequisite
+
+There are some requirements before you build the project.
+Please make sure you have already installed the software in your system.
+
+1. gcc 9.3 or higher version
+2. java8 OpenJDK -> yum install java-1.8.0-openjdk
+3. cmake 3.2 or higher version
+4. maven 3.1.1 or higher version
+5. Hadoop 2.7.5 or higher version
+6. Spark 3.0.0 or higher version
+7. Intel Optimized Arrow 0.17.0
+
+## gcc installation
+
+// installing gcc 9.3 or higher version
+
+Please notes for better performance support, gcc 9.3 is a minimal requirement with Intel Microarchitecture such as SKYLAKE, CASCADELAKE, ICELAKE.
+https://gcc.gnu.org/install/index.html
+
+Follow the above website to download gcc.
+C++ library may ask a certain version, if you are using gcc 9.3 the version would be libstdc++.so.6.0.28.
+You may have to launch ./contrib/download_prerequisites command to install all the prerequisites for gcc.
+If you are facing downloading issue in download_prerequisites command, you can try to change ftp to http.
+
+//Follow the steps to configure gcc
+https://gcc.gnu.org/install/configure.html
+
+If you are facing a multilib issue, you can try to add --disable-multilib parameter in ../configure
+
+//Follow the steps to build gc
+https://gcc.gnu.org/install/build.html
+
+//Follow the steps to install gcc
+https://gcc.gnu.org/install/finalinstall.html
+
+//Set up Environment for new gcc
+```
+export PATH=$YOUR_GCC_INSTALLATION_DIR/bin:$PATH
+export LD_LIBRARY_PATH=$YOUR_GCC_INSTALLATION_DIR/lib64:$LD_LIBRARY_PATH
+```
+Please remember to add and source the setup in your environment files such as /etc/profile or /etc/bashrc
+
+//Verify if gcc has been installation
+Use gcc -v command to verify if your gcc version is correct.(Must larger than 9.3)
+
+## cmake installation
+If you are facing some trouble when installing cmake, please follow below steps to install cmake.
+
+```
+// installing cmake 3.2
+sudo yum install cmake3
+
+// If you have an existing cmake, you can use below command to set it as an option within alternatives command
+sudo alternatives --install /usr/local/bin/cmake cmake /usr/bin/cmake 10 --slave /usr/local/bin/ctest ctest /usr/bin/ctest --slave /usr/local/bin/cpack cpack /usr/bin/cpack --slave /usr/local/bin/ccmake ccmake /usr/bin/ccmake --family cmake
+
+// Set cmake3 as an option within alternatives command
+sudo alternatives --install /usr/local/bin/cmake cmake /usr/bin/cmake3 20 --slave /usr/local/bin/ctest ctest /usr/bin/ctest3 --slave /usr/local/bin/cpack cpack /usr/bin/cpack3 --slave /usr/local/bin/ccmake ccmake /usr/bin/ccmake3 --family cmake
+
+// Use alternatives to choose cmake version
+sudo alternatives --config cmake
+```
+
+## maven installation
+
+If you are facing some trouble when installing maven, please follow below steps to install maven
+
+// installing maven 3.6.3
+
+Go to https://maven.apache.org/download.cgi and download the specific version of maven
+
+// Below command use maven 3.6.3 as an example
+```
+wget htps://ftp.wayne.edu/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz
+wget https://ftp.wayne.edu/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz
+tar xzf apache-maven-3.6.3-bin.tar.gz
+mkdir /usr/local/maven
+mv apache-maven-3.6.3/ /usr/local/maven/
+```
+
+// Set maven 3.6.3 as an option within alternatives command
+```
+sudo alternatives --install /usr/bin/mvn mvn /usr/local/maven/apache-maven-3.6.3/bin/mvn 1
+```
+
+// Use alternatives to choose mvn version
+
+```
+sudo alternatives --config mvn
+```
+
+## HADOOP/SPARK Installation
+
+If there is no existing Hadoop/Spark installed, Please follow the guide to install your Hadoop/Spark [SPARK/HADOOP Installation](./SparkInstallation.md)
+
+### Hadoop Native Library(Default)
+
+Please make sure you have set up Hadoop directory properly with Hadoop Native Libraries
+By default, Apache Arrow would scan `$HADOOP_HOME` and find the native Hadoop library `libhdfs.so`(under `$HADOOP_HOME/lib/native` directory) to be used for Hadoop client.
+
+You can also use `ARROW_LIBHDFS_DIR` to configure the location of `libhdfs.so` if it is installed in other directory than `$HADOOP_HOME/lib/native`
+
+If your SPARK and HADOOP are separated in different nodes, please find `libhdfs.so` in your Hadoop cluster and copy it to SPARK cluster, then use one of the above methods to set it properly.
+
+For more information, please check
+Arrow HDFS interface [documentation](https://github.com/apache/arrow/blob/master/cpp/apidoc/HDFS.md)
+Hadoop Native Library, please read the official Hadoop website [documentation](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/NativeLibraries.html)
+
+### Use libhdfs3 library for better performance(Optional)
+
+For better performance ArrowDataSource reads HDFS files using the third-party library `libhdfs3`. The library must be pre-installed on machines Spark Executor nodes are running on.
+
+To install the library, use of [Conda](https://docs.conda.io/en/latest/) is recommended.
+
+```
+// installing libhdfs3
+conda install -c conda-forge libhdfs3
+
+// check the installed library file
+ll ~/miniconda/envs/$(YOUR_ENV_NAME)/lib/libhdfs3.so
+```
+
+We also provide a libhdfs3 binary in cpp/src/resources directory.
+
+To set up libhdfs3, there are two different ways:
+Option1: Overwrite the soft link for libhdfs.so
+To install libhdfs3.so, you have to create a soft link for libhdfs.so in your Hadoop directory(`$HADOOP_HOME/lib/native` by default).
+
+```
+ln -f -s libhdfs3.so libhdfs.so
+```
+
+Option2:
+Add env variable to the system
+```
+export ARROW_LIBHDFS3_DIR="PATH_TO_LIBHDFS3_DIR/"
+```
+
+Add following Spark configuration options before running the DataSource to make the library to be recognized:
+
+* `spark.executorEnv.ARROW_LIBHDFS3_DIR = "PATH_TO_LIBHDFS3_DIR/"`
+* `spark.executorEnv.LD_LIBRARY_PATH = "PATH_TO_LIBHDFS3_DEPENDENCIES_DIR/"`
+
+Please notes: If you choose to use libhdfs3.so, there are some other dependency libraries you have to installed such as libprotobuf or libcrypto.
+
+
+## Intel Optimized Apache Arrow Installation
+
+Intel Optimized Apache Arrow is MANDATORY to be used. However, we have a bundle a compiled arrow libraries(libarrow, libgandiva, libparquet) built by GCC9.3 included in the cpp/src/resources directory.
+If you wish to build Apache Arrow by yourself, please follow the guide to build and install Apache Arrow [ArrowInstallation](./ApacheArrowInstallation.md)
+
diff --git a/docs/SparkInstallation.md b/docs/SparkInstallation.md
new file mode 100644
index 000000000..9d2a864ae
--- /dev/null
+++ b/docs/SparkInstallation.md
@@ -0,0 +1,44 @@
+### Download Spark 3.0.1
+
+Currently Native SQL Engine works on the Spark 3.0.1 version.
+
+```
+wget http://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz
+sudo mkdir -p /opt/spark && sudo mv spark-3.0.1-bin-hadoop3.2.tgz /opt/spark
+sudo cd /opt/spark && sudo tar -xf spark-3.0.1-bin-hadoop3.2.tgz
+export SPARK_HOME=/opt/spark/spark-3.0.1-bin-hadoop3.2/
+```
+
+### [Or building Spark from source](https://spark.apache.org/docs/latest/building-spark.html)
+
+``` shell
+git clone https://github.com/intel-bigdata/spark.git
+cd spark && git checkout native-sql-engine-clean
+# check spark supported hadoop version
+grep \<hadoop\.version\> -r pom.xml
+    <hadoop.version>2.7.4</hadoop.version>
+    <hadoop.version>3.2.0</hadoop.version>
+# so we should build spark specifying hadoop version as 3.2
+./build/mvn -Pyarn -Phadoop-3.2 -Dhadoop.version=3.2.0 -DskipTests clean install
+```
+Specify SPARK_HOME to spark path
+
+``` shell
+export SPARK_HOME=${HADOOP_PATH}
+```
+
+### Hadoop building from source
+
+``` shell
+git clone https://github.com/apache/hadoop.git
+cd hadoop
+git checkout rel/release-3.2.0
+# only build binary for hadoop
+mvn clean install -Pdist -DskipTests -Dtar
+# build binary and native library such as libhdfs.so for hadoop
+# mvn clean install -Pdist,native -DskipTests -Dtar
+```
+
+``` shell
+export HADOOP_HOME=${HADOOP_PATH}/hadoop-dist/target/hadoop-3.2.0/
+```
diff --git a/docs/User-Guide.md b/docs/User-Guide.md
new file mode 100644
index 000000000..c3c05cebf
--- /dev/null
+++ b/docs/User-Guide.md
@@ -0,0 +1,118 @@
+# Spark Native SQL Engine
+
+A Native Engine for Spark SQL with vectorized SIMD optimizations
+
+## Introduction
+
+![Overview](./image/nativesql_arch.png)
+
+Spark SQL works very well with structured row-based data. It used WholeStageCodeGen to improve the performance by Java JIT code. However Java JIT is usually not working very well on utilizing latest SIMD instructions, especially under complicated queries. [Apache Arrow](https://arrow.apache.org/) provided CPU-cache friendly columnar in-memory layout, its SIMD optimized kernels and LLVM based SQL engine Gandiva are also very efficient. Native SQL Engine used these technoligies and brought better performance to Spark SQL.
+
+## Key Features
+
+### Apache Arrow formatted intermediate data among Spark operator
+
+![Overview](./image/columnar.png)
+
+With [Spark 27396](https://issues.apache.org/jira/browse/SPARK-27396) its possible to pass a RDD of Columnarbatch to operators. We implemented this API with Arrow columnar format.
+
+### Apache Arrow based Native Readers for Parquet and other formats
+
+![Overview](./image/dataset.png)
+
+A native parquet reader was developed to speed up the data loading. it's based on Apache Arrow Dataset. For details please check [Arrow Data Source](https://github.com/oap-project/arrow-data-source)
+
+### Apache Arrow Compute/Gandiva based operators
+
+![Overview](./image/kernel.png)
+
+We implemented common operators based on Apache Arrow Compute and Gandiva. The SQL expression was compiled to one expression tree with protobuf and passed to native kernels. The native kernels will then evaluate the these expressions based on the input columnar batch.
+
+### Native Columnar Shuffle Operator with efficient compression support
+
+![Overview](./image/shuffle.png)
+
+We implemented columnar shuffle to improve the shuffle performance. With the columnar layout we could do very efficient data compression for different data format.
+
+## Build the Plugin
+
+### Building by Conda
+
+If you already have a working Hadoop Spark Cluster, we provide a Conda package which will automatically install dependencies needed by OAP, you can refer to [OAP-Installation-Guide](./OAP-Installation-Guide.md) for more information. Once finished [OAP-Installation-Guide](./OAP-Installation-Guide.md), you can find built `spark-columnar-core-<version>-jar-with-dependencies.jar` under `$HOME/miniconda2/envs/oapenv/oap_jars`.
+Then you can just skip below steps and jump to Getting Started [Get Started](#get-started).
+
+### Building by yourself
+
+If you prefer to build from the source code on your hand, please follow below steps to set up your environment.
+
+### Prerequisite
+There are some requirements before you build the project.
+Please check the document [Prerequisite](./Prerequisite.md) and make sure you have already installed the software in your system.
+If you are running a SPARK Cluster, please make sure all the software are installed in every single node.
+
+### Installation
+Please check the document [Installation Guide](./Installation.md) 
+
+### Configuration & Testing 
+Please check the document [Configuration Guide](./Configuration.md)
+
+## Get started
+To enable OAP NativeSQL Engine, the previous built jar `spark-columnar-core-<version>-jar-with-dependencies.jar` should be added to Spark configuration. We also recommend to use `spark-arrow-datasource-standard-<version>-jar-with-dependencies.jar`. We will demonstrate an example by using both jar files.
+SPARK related options are:
+
+* `spark.driver.extraClassPath` : Set to load jar file to driver.
+* `spark.executor.extraClassPath` : Set to load jar file to executor.
+* `jars` : Set to copy jar file to the executors when using yarn cluster mode.
+* `spark.executorEnv.ARROW_LIBHDFS3_DIR` : Optional if you are using a custom libhdfs3.so.
+* `spark.executorEnv.LD_LIBRARY_PATH` : Optional if you are using a custom libhdfs3.so.
+
+For Spark Standalone Mode, please set the above value as relative path to the jar file.
+For Spark Yarn Cluster Mode, please set the above value as absolute path to the jar file.
+
+Example to run Spark Shell with ArrowDataSource jar file
+```
+${SPARK_HOME}/bin/spark-shell \
+        --verbose \
+        --master yarn \
+        --driver-memory 10G \
+        --conf spark.driver.extraClassPath=$PATH_TO_JAR/spark-arrow-datasource-standard-<version>-jar-with-dependencies.jar:$PATH_TO_JAR/spark-columnar-core-<version>-jar-with-dependencies.jar \
+        --conf spark.executor.extraClassPath=$PATH_TO_JAR/spark-arrow-datasource-standard-<version>-jar-with-dependencies.jar:$PATH_TO_JAR/spark-columnar-core-<version>-jar-with-dependencies.jar \
+        --conf spark.driver.cores=1 \
+        --conf spark.executor.instances=12 \
+        --conf spark.executor.cores=6 \
+        --conf spark.executor.memory=20G \
+        --conf spark.memory.offHeap.size=80G \
+        --conf spark.task.cpus=1 \
+        --conf spark.locality.wait=0s \
+        --conf spark.sql.shuffle.partitions=72 \
+        --conf spark.executorEnv.ARROW_LIBHDFS3_DIR="$PATH_TO_LIBHDFS3_DIR/" \
+        --conf spark.executorEnv.LD_LIBRARY_PATH="$PATH_TO_LIBHDFS3_DEPENDENCIES_DIR"
+        --jars $PATH_TO_JAR/spark-arrow-datasource-standard-<version>-jar-with-dependencies.jar,$PATH_TO_JAR/spark-columnar-core-<version>-jar-with-dependencies.jar
+```
+
+Here is one example to verify if native sql engine works, make sure you have TPC-H dataset.  We could do a simple projection on one parquet table. For detailed testing scripts, please refer to [Solution Guide](https://github.com/Intel-bigdata/Solution_navigator/tree/master/nativesql).
+```
+val orders = spark.read.format("arrow").load("hdfs:////user/root/date_tpch_10/orders")
+orders.createOrReplaceTempView("orders")
+spark.sql("select * from orders where o_orderdate > date '1998-07-26'").show(20000, false)
+```
+
+The result should show up on Spark console and you can check the DAG diagram with some Columnar Processing stage.
+
+
+## Performance data
+
+For initial microbenchmark performance, we add 10 fields up with spark, data size is 200G data
+
+![Performance](./image/performance.png)
+
+## Coding Style
+
+* For Java code, we used [google-java-format](https://github.com/google/google-java-format)
+* For Scala code, we used [Spark Scala Format](https://github.com/apache/spark/blob/master/dev/.scalafmt.conf), please use [scalafmt](https://github.com/scalameta/scalafmt) or run ./scalafmt for scala codes format
+* For Cpp codes, we used Clang-Format, check on this link [google-vim-codefmt](https://github.com/google/vim-codefmt) for details.
+
+## Contact
+
+chendi.xue@intel.com
+binwei.yang@intel.com
diff --git a/docs/image/columnar.png b/docs/image/columnar.png
new file mode 100644
index 000000000..d89074905
Binary files /dev/null and b/docs/image/columnar.png differ
diff --git a/docs/image/core_arch.jpg b/docs/image/core_arch.jpg
new file mode 100644
index 000000000..4f732a4ff
Binary files /dev/null and b/docs/image/core_arch.jpg differ
diff --git a/docs/image/dataset.png b/docs/image/dataset.png
new file mode 100644
index 000000000..5d3e607ab
Binary files /dev/null and b/docs/image/dataset.png differ
diff --git a/docs/image/decision_support_bench1_result_by_query.png b/docs/image/decision_support_bench1_result_by_query.png
new file mode 100644
index 000000000..af1c67e8d
Binary files /dev/null and b/docs/image/decision_support_bench1_result_by_query.png differ
diff --git a/docs/image/decision_support_bench1_result_in_total.png b/docs/image/decision_support_bench1_result_in_total.png
new file mode 100644
index 000000000..9674abc9a
Binary files /dev/null and b/docs/image/decision_support_bench1_result_in_total.png differ
diff --git a/docs/image/decision_support_bench2_result_by_query.png b/docs/image/decision_support_bench2_result_by_query.png
new file mode 100644
index 000000000..4578dd307
Binary files /dev/null and b/docs/image/decision_support_bench2_result_by_query.png differ
diff --git a/docs/image/decision_support_bench2_result_in_total.png b/docs/image/decision_support_bench2_result_in_total.png
new file mode 100644
index 000000000..88db8f768
Binary files /dev/null and b/docs/image/decision_support_bench2_result_in_total.png differ
diff --git a/docs/image/kernel.png b/docs/image/kernel.png
new file mode 100644
index 000000000..f88b002aa
Binary files /dev/null and b/docs/image/kernel.png differ
diff --git a/docs/image/nativesql_arch.png b/docs/image/nativesql_arch.png
new file mode 100644
index 000000000..a8304f5af
Binary files /dev/null and b/docs/image/nativesql_arch.png differ
diff --git a/docs/image/performance.png b/docs/image/performance.png
new file mode 100644
index 000000000..a4351cd9a
Binary files /dev/null and b/docs/image/performance.png differ
diff --git a/docs/image/shuffle.png b/docs/image/shuffle.png
new file mode 100644
index 000000000..504234536
Binary files /dev/null and b/docs/image/shuffle.png differ
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 000000000..a0662883f
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,118 @@
+# Spark Native SQL Engine
+
+A Native Engine for Spark SQL with vectorized SIMD optimizations
+
+## Introduction
+
+![Overview](./image/nativesql_arch.png)
+
+Spark SQL works very well with structured row-based data. It used WholeStageCodeGen to improve the performance by Java JIT code. However Java JIT is usually not working very well on utilizing latest SIMD instructions, especially under complicated queries. [Apache Arrow](https://arrow.apache.org/) provided CPU-cache friendly columnar in-memory layout, its SIMD optimized kernels and LLVM based SQL engine Gandiva are also very efficient. Native SQL Engine used these technoligies and brought better performance to Spark SQL.
+
+## Key Features
+
+### Apache Arrow formatted intermediate data among Spark operator
+
+![Overview](./image/columnar.png)
+
+With [Spark 27396](https://issues.apache.org/jira/browse/SPARK-27396) its possible to pass a RDD of Columnarbatch to operators. We implemented this API with Arrow columnar format.
+
+### Apache Arrow based Native Readers for Parquet and other formats
+
+![Overview](./image/dataset.png)
+
+A native parquet reader was developed to speed up the data loading. it's based on Apache Arrow Dataset. For details please check [Arrow Data Source](https://github.com/oap-project/arrow-data-source)
+
+### Apache Arrow Compute/Gandiva based operators
+
+![Overview](./image/kernel.png)
+
+We implemented common operators based on Apache Arrow Compute and Gandiva. The SQL expression was compiled to one expression tree with protobuf and passed to native kernels. The native kernels will then evaluate the these expressions based on the input columnar batch.
+
+### Native Columnar Shuffle Operator with efficient compression support
+
+![Overview](./image/shuffle.png)
+
+We implemented columnar shuffle to improve the shuffle performance. With the columnar layout we could do very efficient data compression for different data format.
+
+## Build the Plugin
+
+### Building by Conda
+
+If you already have a working Hadoop Spark Cluster, we provide a Conda package which will automatically install dependencies needed by OAP, you can refer to [OAP-Installation-Guide](./OAP-Installation-Guide.md) for more information. Once finished [OAP-Installation-Guide](./OAP-Installation-Guide.md), you can find built `spark-columnar-core-1.0.0-jar-with-dependencies.jar` under `$HOME/miniconda2/envs/oapenv/oap_jars`.
+Then you can just skip below steps and jump to Getting Started [Get Started](#get-started).
+
+### Building by yourself
+
+If you prefer to build from the source code on your hand, please follow below steps to set up your environment.
+
+### Prerequisite
+There are some requirements before you build the project.
+Please check the document [Prerequisite](./Prerequisite.md) and make sure you have already installed the software in your system.
+If you are running a SPARK Cluster, please make sure all the software are installed in every single node.
+
+### Installation
+Please check the document [Installation Guide](./Installation.md) 
+
+### Configuration & Testing 
+Please check the document [Configuration Guide](./Configuration.md)
+
+## Get started
+To enable OAP NativeSQL Engine, the previous built jar `spark-columnar-core-<version>-jar-with-dependencies.jar` should be added to Spark configuration. We also recommend to use `spark-arrow-datasource-standard-<version>-jar-with-dependencies.jar`. We will demonstrate an example by using both jar files.
+SPARK related options are:
+
+* `spark.driver.extraClassPath` : Set to load jar file to driver.
+* `spark.executor.extraClassPath` : Set to load jar file to executor.
+* `jars` : Set to copy jar file to the executors when using yarn cluster mode.
+* `spark.executorEnv.ARROW_LIBHDFS3_DIR` : Optional if you are using a custom libhdfs3.so.
+* `spark.executorEnv.LD_LIBRARY_PATH` : Optional if you are using a custom libhdfs3.so.
+
+For Spark Standalone Mode, please set the above value as relative path to the jar file.
+For Spark Yarn Cluster Mode, please set the above value as absolute path to the jar file.
+
+Example to run Spark Shell with ArrowDataSource jar file
+```
+${SPARK_HOME}/bin/spark-shell \
+        --verbose \
+        --master yarn \
+        --driver-memory 10G \
+        --conf spark.driver.extraClassPath=$PATH_TO_JAR/spark-arrow-datasource-standard-<version>-jar-with-dependencies.jar:$PATH_TO_JAR/spark-columnar-core-<version>-jar-with-dependencies.jar \
+        --conf spark.executor.extraClassPath=$PATH_TO_JAR/spark-arrow-datasource-standard-<version>-jar-with-dependencies.jar:$PATH_TO_JAR/spark-columnar-core-<version>-jar-with-dependencies.jar \
+        --conf spark.driver.cores=1 \
+        --conf spark.executor.instances=12 \
+        --conf spark.executor.cores=6 \
+        --conf spark.executor.memory=20G \
+        --conf spark.memory.offHeap.size=80G \
+        --conf spark.task.cpus=1 \
+        --conf spark.locality.wait=0s \
+        --conf spark.sql.shuffle.partitions=72 \
+        --conf spark.executorEnv.ARROW_LIBHDFS3_DIR="$PATH_TO_LIBHDFS3_DIR/" \
+        --conf spark.executorEnv.LD_LIBRARY_PATH="$PATH_TO_LIBHDFS3_DEPENDENCIES_DIR"
+        --jars $PATH_TO_JAR/spark-arrow-datasource-standard-<version>-jar-with-dependencies.jar,$PATH_TO_JAR/spark-columnar-core-<version>-jar-with-dependencies.jar
+```
+
+Here is one example to verify if native sql engine works, make sure you have TPC-H dataset.  We could do a simple projection on one parquet table. For detailed testing scripts, please refer to [Solution Guide](https://github.com/Intel-bigdata/Solution_navigator/tree/master/nativesql).
+```
+val orders = spark.read.format("arrow").load("hdfs:////user/root/date_tpch_10/orders")
+orders.createOrReplaceTempView("orders")
+spark.sql("select * from orders where o_orderdate > date '1998-07-26'").show(20000, false)
+```
+
+The result should show up on Spark console and you can check the DAG diagram with some Columnar Processing stage.
+
+
+## Performance data
+
+For initial microbenchmark performance, we add 10 fields up with spark, data size is 200G data
+
+![Performance](./image/performance.png)
+
+## Coding Style
+
+* For Java code, we used [google-java-format](https://github.com/google/google-java-format)
+* For Scala code, we used [Spark Scala Format](https://github.com/apache/spark/blob/master/dev/.scalafmt.conf), please use [scalafmt](https://github.com/scalameta/scalafmt) or run ./scalafmt for scala codes format
+* For Cpp codes, we used Clang-Format, check on this link [google-vim-codefmt](https://github.com/google/vim-codefmt) for details.
+
+## Contact
+
+chendi.xue@intel.com
+binwei.yang@intel.com
diff --git a/docs/limitation.md b/docs/limitation.md
new file mode 100644
index 000000000..a4b66f5e1
--- /dev/null
+++ b/docs/limitation.md
@@ -0,0 +1,17 @@
+# Limitations for Native SQL Engine
+
+## Spark compability
+Native SQL engine currenlty works with Spark 3.0.0 only. There are still some trouble with latest Shuffle/AQE API from Spark 3.0.1, 3.0.2 or 3.1.x.
+
+## Operator limitations
+All performance critical operators in TPC-H/TPC-DS should be supported. For those unsupported operators, Native SQL engine will automatically fallback to row operators in vanilla Spark.
+
+### Columnar Projection with Filter
+We used 16 bit selection vector for filter so the max batch size need to be < 65536
+
+### Columnar Sort
+Columnar Sort does not support spill to disk yet. To reduce the peak memory usage, we used smaller data structure(uin16_t), so this limits 
+- the max batch size to be < 65536
+- the number of batches in one partiton to be < 65536
+
+
diff --git a/native-sql-engine/core/pom.xml b/native-sql-engine/core/pom.xml
index 6fd02b5ff..f36296027 100644
--- a/native-sql-engine/core/pom.xml
+++ b/native-sql-engine/core/pom.xml
@@ -36,6 +36,11 @@
     <hive.parquet.group>com.twitter</hive.parquet.group>
     <parquet.deps.scope>provided</parquet.deps.scope>
     <jars.target.dir>${project.build.directory}/scala-${scala.binary.version}/jars</jars.target.dir>
+    <cpp_tests>OFF</cpp_tests>
+    <build_arrow>ON</build_arrow>
+    <static_arrow>OFF</static_arrow>
+    <build_protobuf>ON</build_protobuf>
+    <arrow_root>/usr/local</arrow_root>
   </properties>
   <dependencies>
     <!-- Prevent our dummy JAR from being included in Spark distributions or uploaded to YARN -->
@@ -296,7 +301,15 @@
                       <goal>exec</goal>
                   </goals>
                   <configuration>
-                      <executable>${cpp.dir}/compile.sh</executable>
+                      <executable>bash</executable>
+                      <arguments>
+                          <argument>${cpp.dir}/compile.sh</argument>
+                          <argument>${cpp_tests}</argument>
+                          <argument>${build_arrow}</argument>
+                          <argument>${static_arrow}</argument>
+                          <argument>${build_protobuf}</argument>
+                          <argument>${arrow_root}</argument>
+                      </arguments>
                   </configuration>
               </execution>
           </executions>
diff --git a/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java b/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java
index e65c4b39e..cd5f774dc 100644
--- a/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java
+++ b/native-sql-engine/core/src/main/java/com/intel/oap/vectorized/JniUtils.java
@@ -39,6 +39,8 @@
 /** Helper class for JNI related operations. */
 public class JniUtils {
   private static final String LIBRARY_NAME = "spark_columnar_jni";
+  private static final String ARROW_LIBRARY_NAME = "libarrow.so.300";
+  private static final String GANDIVA_LIBRARY_NAME = "libgandiva.so.300";
   private static boolean isLoaded = false;
   private static boolean isCodegenDependencyLoaded = false;
   private static List<String> codegenJarsLoadedCache = new ArrayList<>();
@@ -77,6 +79,8 @@ private JniUtils(String _tmp_dir) throws IOException, IllegalAccessException, Il
       try {
         loadLibraryFromJar(tmp_dir);
       } catch (IOException ex) {
+        System.load(ARROW_LIBRARY_NAME);
+        System.load(GANDIVA_LIBRARY_NAME);
         System.loadLibrary(LIBRARY_NAME);
       }
       isLoaded = true;
@@ -108,6 +112,10 @@ static void loadLibraryFromJar(String tmp_dir) throws IOException, IllegalAccess
       if (tmp_dir == null) {
         tmp_dir = System.getProperty("java.io.tmpdir");
       }
+      final File arrowlibraryFile = moveFileFromJarToTemp(tmp_dir, ARROW_LIBRARY_NAME);
+      System.load(arrowlibraryFile.getAbsolutePath());
+      final File gandivalibraryFile = moveFileFromJarToTemp(tmp_dir, GANDIVA_LIBRARY_NAME);
+      System.load(gandivalibraryFile.getAbsolutePath());
       final String libraryToLoad = System.mapLibraryName(LIBRARY_NAME);
       final File libraryFile = moveFileFromJarToTemp(tmp_dir, libraryToLoad);
       System.load(libraryFile.getAbsolutePath());
diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarShuffledHashJoinExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarShuffledHashJoinExec.scala
index ec67e8948..2d372c836 100644
--- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarShuffledHashJoinExec.scala
+++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarShuffledHashJoinExec.scala
@@ -92,6 +92,22 @@ case class ColumnarShuffledHashJoinExec(
     }
   }
 
+  val builder_type = {
+    if (condition.isDefined) 1
+    else {
+      joinType match {
+        case LeftSemi =>
+          3
+        case LeftAnti =>
+          3
+        case j: ExistenceJoin =>
+          3
+        case other =>
+          1
+      }
+    }
+  }
+
   def buildCheck(): Unit = {
     // build check for condition
     val conditionExpr: Expression = condition.orNull
@@ -180,7 +196,8 @@ case class ColumnarShuffledHashJoinExec(
     ColumnarCodegenContext(
       inputSchema,
       null,
-      ColumnarConditionedProbeJoin.prepareHashBuildFunction(buildKeyExprs, buildPlan.output, 1))
+      ColumnarConditionedProbeJoin
+        .prepareHashBuildFunction(buildKeyExprs, buildPlan.output, builder_type))
   }
 
   override def supportColumnarCodegen: Boolean = true
@@ -256,7 +273,7 @@ case class ColumnarShuffledHashJoinExec(
       val hashRelationBatchHolder: ListBuffer[ColumnarBatch] = ListBuffer()
       val hash_relation_function =
         ColumnarConditionedProbeJoin
-          .prepareHashBuildFunction(buildKeyExprs, buildPlan.output, 1)
+          .prepareHashBuildFunction(buildKeyExprs, buildPlan.output, builder_type)
       val hash_relation_schema = ConverterUtils.toArrowSchema(buildPlan.output)
       val hash_relation_expr =
         TreeBuilder.makeExpression(
diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWindowExec.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWindowExec.scala
index e59a732dd..bd0d7cd9c 100644
--- a/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWindowExec.scala
+++ b/native-sql-engine/core/src/main/scala/com/intel/oap/execution/ColumnarWindowExec.scala
@@ -27,7 +27,7 @@ import org.apache.arrow.gandiva.expression.TreeBuilder
 import org.apache.arrow.vector.types.pojo.{ArrowType, Field, FieldType, Schema}
 import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeReference, Cast, Descending, Expression, MakeDecimal, NamedExpression, Rank, SortOrder, UnscaledValue, WindowExpression, WindowFunction}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeReference, Cast, Descending, Expression, MakeDecimal, NamedExpression, Rank, SortOrder, UnscaledValue, WindowExpression, WindowFunction, WindowSpecDefinition}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, Average, Sum}
 import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.execution.SparkPlan
@@ -71,45 +71,65 @@ class ColumnarWindowExec(windowExpression: Seq[NamedExpression],
   val sparkConf = sparkContext.getConf
   val numaBindingInfo = ColumnarPluginConfig.getConf.numaBindingInfo
 
+  def checkAggFunctionSpec(windowSpec: WindowSpecDefinition): Unit = {
+    if (windowSpec.orderSpec.nonEmpty) {
+      throw new UnsupportedOperationException("unsupported operation for " +
+          "aggregation window function: " + windowSpec)
+    }
+  }
+
+  def checkRankSpec(windowSpec: WindowSpecDefinition): Unit = {
+    // leave it empty for now
+  }
+
   val windowFunctions: Seq[(String, Expression)] = windowExpression
       .map(e => e.asInstanceOf[Alias])
       .map(a => a.child.asInstanceOf[WindowExpression])
-      .map(w => w.windowFunction)
+      .map(w => (w, w.windowFunction))
       .map {
-        case a: AggregateExpression => a.aggregateFunction
-        case b: WindowFunction => b
-        case f =>
-          throw new UnsupportedOperationException("unsupported window function type: " +
-              f)
+        case (expr, func) =>
+          (expr, func match {
+            case a: AggregateExpression => a.aggregateFunction
+            case b: WindowFunction => b
+            case f =>
+              throw new UnsupportedOperationException("unsupported window function type: " +
+                  f)
+          })
       }
-      .map { f =>
-        val name = f match {
-          case _: Sum => "sum"
-          case _: Average => "avg"
-          case _: Rank =>
-            val desc: Option[Boolean] = orderSpec.foldLeft[Option[Boolean]](None) {
-              (desc, s) =>
-                val currentDesc = s.direction match {
-                  case Ascending => false
-                  case Descending => true
-                  case _ => throw new IllegalStateException
-                }
-                if (desc.isEmpty) {
-                  Some(currentDesc)
-                } else if (currentDesc == desc.get) {
-                  Some(currentDesc)
-                } else {
-                  throw new UnsupportedOperationException("Rank: clashed rank order found")
-                }
-            }
-            desc match {
-              case Some(true) => "rank_desc"
-              case Some(false) => "rank_asc"
-              case None => "rank_asc"
-            }
-          case f => throw new UnsupportedOperationException("unsupported window function: " + f)
-        }
-        (name, f)
+      .map {
+        case (expr, func) =>
+          val name = func match {
+            case _: Sum =>
+              checkAggFunctionSpec(expr.windowSpec)
+              "sum"
+            case _: Average =>
+              checkAggFunctionSpec(expr.windowSpec)
+              "avg"
+            case _: Rank =>
+              checkRankSpec(expr.windowSpec)
+              val desc: Option[Boolean] = orderSpec.foldLeft[Option[Boolean]](None) {
+                (desc, s) =>
+                  val currentDesc = s.direction match {
+                    case Ascending => false
+                    case Descending => true
+                    case _ => throw new IllegalStateException
+                  }
+                  if (desc.isEmpty) {
+                    Some(currentDesc)
+                  } else if (currentDesc == desc.get) {
+                    Some(currentDesc)
+                  } else {
+                    throw new UnsupportedOperationException("Rank: clashed rank order found")
+                  }
+              }
+              desc match {
+                case Some(true) => "rank_desc"
+                case Some(false) => "rank_asc"
+                case None => "rank_asc"
+              }
+            case f => throw new UnsupportedOperationException("unsupported window function: " + f)
+          }
+          (name, func)
       }
 
   if (windowFunctions.isEmpty) {
diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarArithmetic.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarArithmetic.scala
index 510cb2efe..db8b4c2f1 100644
--- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarArithmetic.scala
+++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarArithmetic.scala
@@ -41,7 +41,8 @@ class ColumnarAdd(left: Expression, right: Expression, original: Expression)
     with Logging {
 
   // If casting between DecimalType, unnecessary cast is skipped to avoid data loss,
-  // because res type of "cast" is actually the res type of "add/subtract".
+  // because actually res type of "cast" is the res type in "add/subtract",
+  // and is the wider type in "multiply/divide".
   val left_val: Any = left match {
     case c: ColumnarCast =>
       if (c.child.dataType.isInstanceOf[DecimalType] &&
@@ -162,18 +163,41 @@ class ColumnarMultiply(left: Expression, right: Expression, original: Expression
     with ColumnarExpression
     with Logging {
 
+  val left_val: Any = left match {
+    case c: ColumnarCast =>
+      if (c.child.dataType.isInstanceOf[DecimalType] &&
+        c.dataType.isInstanceOf[DecimalType]) {
+        c.child
+      } else {
+        left
+      }
+    case _ =>
+      left
+  }
+  val right_val: Any = right match {
+    case c: ColumnarCast =>
+      if (c.child.dataType.isInstanceOf[DecimalType] &&
+        c.dataType.isInstanceOf[DecimalType]) {
+        c.child
+      } else {
+        right
+      }
+    case _ =>
+      right
+  }
+
   override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = {
     var (left_node, left_type): (TreeNode, ArrowType) =
-      left.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
+      left_val.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
     var (right_node, right_type): (TreeNode, ArrowType) =
-      right.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
+      right_val.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
 
     (left_type, right_type) match {
       case (l: ArrowType.Decimal, r: ArrowType.Decimal) =>
         var resultType = DecimalTypeUtil.getResultTypeForOperation(
           DecimalTypeUtil.OperationType.MULTIPLY, l, r)
         // Scaling down the unnecessary scale for Literal to avoid precision loss
-        val newLeftNode = left match {
+        val newLeftNode = left_val match {
           case literal: ColumnarLiteral =>
             val leftStr = literal.value.asInstanceOf[Decimal].toDouble.toString
             val newLeftPrecision = leftStr.length - 1
@@ -187,7 +211,7 @@ class ColumnarMultiply(left: Expression, right: Expression, original: Expression
           case _ =>
             left_node
         }
-        val newRightNode = right match {
+        val newRightNode = right_val match {
           case literal: ColumnarLiteral =>
             val rightStr = literal.value.asInstanceOf[Decimal].toDouble.toString
             val newRightPrecision = rightStr.length - 1
@@ -230,11 +254,33 @@ class ColumnarDivide(left: Expression, right: Expression,
     with ColumnarExpression
     with Logging {
 
+  val left_val: Any = left match {
+    case c: ColumnarCast =>
+      if (c.child.dataType.isInstanceOf[DecimalType] &&
+        c.dataType.isInstanceOf[DecimalType]) {
+        c.child
+      } else {
+        left
+      }
+    case _ =>
+      left
+  }
+  val right_val: Any = right match {
+    case c: ColumnarCast =>
+      if (c.child.dataType.isInstanceOf[DecimalType] &&
+        c.dataType.isInstanceOf[DecimalType]) {
+        c.child
+      } else {
+        right
+      }
+    case _ =>
+      right
+  }
   override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = {
     var (left_node, left_type): (TreeNode, ArrowType) =
-      left.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
+      left_val.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
     var (right_node, right_type): (TreeNode, ArrowType) =
-      right.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
+      right_val.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
 
     (left_type, right_type) match {
       case (l: ArrowType.Decimal, r: ArrowType.Decimal) =>
@@ -244,7 +290,7 @@ class ColumnarDivide(left: Expression, right: Expression,
           DecimalTypeUtil.getResultTypeForOperation(
             DecimalTypeUtil.OperationType.DIVIDE, l, r)
         }
-        val newLeftNode = left match {
+        val newLeftNode = left_val match {
           case literal: ColumnarLiteral =>
             val leftStr = literal.value.asInstanceOf[Decimal].toDouble.toString
             val newLeftPrecision = leftStr.length - 1
@@ -258,7 +304,7 @@ class ColumnarDivide(left: Expression, right: Expression,
           case _ =>
             left_node
         }
-        val newRightNode = right match {
+        val newRightNode = right_val match {
           case literal: ColumnarLiteral =>
             val rightStr = literal.value.asInstanceOf[Decimal].toDouble.toString
             val newRightPrecision = rightStr.length - 1
diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionedProbeJoin.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionedProbeJoin.scala
index 3b7c6c104..1eead6e8c 100644
--- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionedProbeJoin.scala
+++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarConditionedProbeJoin.scala
@@ -55,7 +55,7 @@ object ColumnarConditionedProbeJoin extends Logging {
   def prepareHashBuildFunction(
       buildKeys: Seq[Expression],
       buildInputAttributes: Seq[Attribute],
-      builder_type: Int = 0,
+      builder_type: Int = 1,
       is_broadcast: Boolean = false): TreeNode = {
     val buildInputFieldList: List[Field] = buildInputAttributes.toList.map(attr => {
       Field
diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarRoundOperator.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarRoundOperator.scala
index 644e09863..ec7808a68 100644
--- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarRoundOperator.scala
+++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarRoundOperator.scala
@@ -42,7 +42,9 @@ class ColumnarRound(child: Expression, scale: Expression, original: Expression)
   buildCheck()
 
   def buildCheck(): Unit = {
-    if (child.dataType != DoubleType) {
+    val supportedTypes = List(FloatType, DoubleType, IntegerType, LongType)
+    if (supportedTypes.indexOf(child.dataType) == -1 &&
+        !child.dataType.isInstanceOf[DecimalType]) {
       throw new UnsupportedOperationException(
         s"${child.dataType} is not supported in ColumnarRound")
     }
@@ -54,7 +56,7 @@ class ColumnarRound(child: Expression, scale: Expression, original: Expression)
     val (scale_node, scaleType): (TreeNode, ArrowType) =
       scale.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
 
-    val resultType = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
+    val resultType = CodeGeneration.getResultType(dataType)
     val funcNode = TreeBuilder.makeFunction("round",
       Lists.newArrayList(child_node, scale_node), resultType)
     (funcNode, resultType)
diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarUnaryOperator.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarUnaryOperator.scala
index 0c713c8e8..82981fbcb 100644
--- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarUnaryOperator.scala
+++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarUnaryOperator.scala
@@ -113,8 +113,8 @@ class ColumnarIsNull(child: Expression, original: Expression)
   }
 }
 
-class ColumnarYear(child: Expression, original: Expression)
-    extends Year(child: Expression)
+class ColumnarMonth(child: Expression, original: Expression)
+    extends Month(child: Expression)
     with ColumnarExpression
     with Logging {
 
@@ -124,7 +124,7 @@ class ColumnarYear(child: Expression, original: Expression)
     val supportedTypes = List(LongType, StringType, DateType)
     if (supportedTypes.indexOf(child.dataType) == -1) {
       throw new UnsupportedOperationException(
-        s"${child.dataType} is not supported in ColumnarYear.")
+        s"${child.dataType} is not supported in ColumnarMonth.")
     }
   }
 
@@ -139,6 +139,80 @@ class ColumnarYear(child: Expression, original: Expression)
         "castDATE",
         Lists.newArrayList(child_node),
         new ArrowType.Date(DateUnit.MILLISECOND))
+    val funcNode =
+      TreeBuilder.makeFunction(
+        "extractMonth",
+        Lists.newArrayList(cast_func),
+        new ArrowType.Int(64, true))
+    val castNode =
+      TreeBuilder.makeFunction("castINT", Lists.newArrayList(funcNode), resultType)
+    (castNode, resultType)
+  }
+}
+
+class ColumnarDayOfMonth(child: Expression, original: Expression)
+  extends DayOfMonth(child: Expression)
+    with ColumnarExpression
+    with Logging {
+
+  buildCheck()
+
+  def buildCheck(): Unit = {
+    val supportedTypes = List(LongType, StringType, DateType)
+    if (supportedTypes.indexOf(child.dataType) == -1) {
+      throw new UnsupportedOperationException(
+        s"${child.dataType} is not supported in ColumnarDayOfMonth.")
+    }
+  }
+
+  override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = {
+    val (child_node, childType): (TreeNode, ArrowType) =
+      child.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
+
+    val resultType = new ArrowType.Int(32, true)
+    //FIXME(): requires utf8()/int64() as input
+    val cast_func =
+    TreeBuilder.makeFunction(
+      "castDATE",
+      Lists.newArrayList(child_node),
+      new ArrowType.Date(DateUnit.MILLISECOND))
+    val funcNode =
+      TreeBuilder.makeFunction(
+        "extractDay",
+        Lists.newArrayList(cast_func),
+        new ArrowType.Int(64, true))
+    val castNode =
+      TreeBuilder.makeFunction("castINT", Lists.newArrayList(funcNode), resultType)
+    (castNode, resultType)
+  }
+}
+
+class ColumnarYear(child: Expression, original: Expression)
+  extends Year(child: Expression)
+    with ColumnarExpression
+    with Logging {
+
+  buildCheck()
+
+  def buildCheck(): Unit = {
+    val supportedTypes = List(LongType, StringType, DateType)
+    if (supportedTypes.indexOf(child.dataType) == -1) {
+      throw new UnsupportedOperationException(
+        s"${child.dataType} is not supported in ColumnarYear.")
+    }
+  }
+
+  override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = {
+    val (child_node, childType): (TreeNode, ArrowType) =
+      child.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
+
+    val resultType = new ArrowType.Int(32, true)
+    //FIXME(): requires utf8()/int64() as input
+    val cast_func =
+    TreeBuilder.makeFunction(
+      "castDATE",
+      Lists.newArrayList(child_node),
+      new ArrowType.Date(DateUnit.MILLISECOND))
     val funcNode =
       TreeBuilder.makeFunction(
         "extractYear",
@@ -184,8 +258,9 @@ class ColumnarAbs(child: Expression, original: Expression)
   buildCheck()
 
   def buildCheck(): Unit = {
-    val supportedTypes = List(FloatType, DoubleType)
-    if (supportedTypes.indexOf(child.dataType) == -1) {
+    val supportedTypes = List(FloatType, DoubleType, IntegerType, LongType)
+    if (supportedTypes.indexOf(child.dataType) == -1 &&
+        !child.dataType.isInstanceOf[DecimalType]) {
       throw new UnsupportedOperationException(
         s"${child.dataType} is not supported in ColumnarAbs")
     }
@@ -557,6 +632,10 @@ object ColumnarUnaryOperator {
       new ColumnarIsNotNull(child, i)
     case y: Year =>
       new ColumnarYear(child, y)
+    case m: Month =>
+      new ColumnarMonth(child, m)
+    case d: DayOfMonth =>
+      new ColumnarDayOfMonth(child, d)
     case n: Not =>
       new ColumnarNot(child, n)
     case a: Abs =>
diff --git a/native-sql-engine/cpp/compile.sh b/native-sql-engine/cpp/compile.sh
index 8f89e491b..064f09741 100755
--- a/native-sql-engine/cpp/compile.sh
+++ b/native-sql-engine/cpp/compile.sh
@@ -2,6 +2,19 @@
 
 set -eu
 
+TESTS=${1:-OFF}
+BUILD_ARROW=${2:-ON}
+STATIC_ARROW=${3:-OFF}
+BUILD_PROTOBUF=${4:-ON}
+ARROW_ROOT=${5:-/usr/local}
+
+echo "CMAKE Arguments:"
+echo "TESTS=${TESTS}"
+echo "BUILD_ARROW=${BUILD_ARROW}"
+echo "STATIC_ARROW=${STATIC_ARROW}"
+echo "BUILD_PROTOBUF=${BUILD_PROTOBUF}"
+echo "ARROW_ROOT=${ARROW_ROOT}"
+
 CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
 echo $CURRENT_DIR
 
@@ -11,7 +24,7 @@ if [ -d build ]; then
 fi
 mkdir build
 cd build
-cmake ..
+cmake .. -DTESTS=${TESTS} -DBUILD_ARROW=${BUILD_ARROW} -DSTATIC_ARROW=${STATIC_ARROW} -DBUILD_PROTOBUF=${BUILD_PROTOBUF} -DARROW_ROOT=${ARROW_ROOT}
 make
 
 set +eu
diff --git a/native-sql-engine/cpp/src/CMakeLists.txt b/native-sql-engine/cpp/src/CMakeLists.txt
index 236237e2e..28536c63f 100644
--- a/native-sql-engine/cpp/src/CMakeLists.txt
+++ b/native-sql-engine/cpp/src/CMakeLists.txt
@@ -13,6 +13,11 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 set(CMAKE_BUILD_TYPE  "Release")
 
+set(ARROW_ROOT "/usr/local" CACHE PATH "Arrow root dir")
+
+option(BUILD_ARROW "Build Arrow from Source" ON)
+option(STATIC_ARROW "Build Arrow with Static Libraries" OFF)
+option(BUILD_PROTOBUF "Build Protobuf from Source" ON)
 option(USE_AVX512 "Build with AVX-512 optimizations" OFF)
 option(TESTS "Build the tests" OFF)
 option(BENCHMARKS "Build the benchmarks" OFF)
@@ -25,22 +30,13 @@ INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS})
 find_package(JNI REQUIRED)
 set(source_root_directory ${CMAKE_CURRENT_SOURCE_DIR})
 
-# Gandiva protobuf
-
-set(CMAKE_FIND_LIBRARY_SUFFIXES ".so")
-find_package(Protobuf)
-
-if ("${Protobuf_LIBRARY}" STREQUAL "Protobuf_LIBRARY-NOTFOUND")
-  message(WARNING "libprotobuf.so not found, will build from source")
-  set(BUILD_PROTOBUF true)
-endif()
-
-if(BUILD_PROTOBUF)
-  message(STATUS "Building Protocol Buffers from source")
+# Building Protobuf
+macro(build_protobuf)
+  message(STATUS "Building Protocol Buffers from Source")
   set (PROTOBUF_SOURCE_URL
        "https://github.com/protocolbuffers/protobuf/releases/download/v3.7.1/protobuf-all-3.7.1.tar.gz"
         "https://github.com/ursa-labs/thirdparty/releases/download/latest/protobuf-v3.7.1.tar.gz"
-)
+  )
   set(PROTOBUF_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep-install")
   set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include")
   set(
@@ -86,35 +82,17 @@ if(BUILD_PROTOBUF)
     PROPERTIES IMPORTED_LOCATION "${PROTOBUF_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES
                "${PROTOBUF_INCLUDE_DIR}")
   add_dependencies(protobuf::libprotobuf protobuf_ep)
-else()
-  set(PROTOC_BIN ${Protobuf_PROTOC_EXECUTABLE})
-endif()
-
-file(MAKE_DIRECTORY ${root_directory}/src/proto)
-set(PROTO_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/proto")
-set(PROTO_OUTPUT_FILES "${PROTO_OUTPUT_DIR}/Exprs.pb.cc")
-set(PROTO_OUTPUT_FILES ${PROTO_OUTPUT_FILES} "${PROTO_OUTPUT_DIR}/Exprs.pb.h")
-
-set_source_files_properties(${PROTO_OUTPUT_FILES} PROPERTIES GENERATED TRUE)
-
-get_filename_component(ABS_GANDIVA_PROTO ${CMAKE_CURRENT_SOURCE_DIR}/proto/Exprs.proto
-                       ABSOLUTE)
-
-add_custom_command(OUTPUT ${PROTO_OUTPUT_FILES}
-                   COMMAND ${PROTOC_BIN}
-                           --proto_path
-                           ${CMAKE_CURRENT_SOURCE_DIR}/proto
-                           --cpp_out
-                       ${PROTO_OUTPUT_DIR}
-                           ${CMAKE_CURRENT_SOURCE_DIR}/proto/Exprs.proto
-                   DEPENDS  ${ABS_GANDIVA_PROTO}
-                   COMMENT "Running PROTO compiler on Exprs.proto"
-                   VERBATIM)
+endmacro()
 
-add_custom_target(jni_proto ALL DEPENDS ${PROTO_OUTPUT_FILES})
-add_dependencies(jni_proto protobuf::libprotobuf)
-set(PROTO_SRCS "${PROTO_OUTPUT_DIR}/Exprs.pb.cc")
-set(PROTO_HDRS "${PROTO_OUTPUT_DIR}/Exprs.pb.h")
+macro(find_protobuf)
+  # Find the existing Protobuf
+  set(CMAKE_FIND_LIBRARY_SUFFIXES ".so")
+  find_package(Protobuf)
+  if ("${Protobuf_LIBRARY}" STREQUAL "Protobuf_LIBRARY-NOTFOUND")
+    message(FATAL_ERROR "Protobuf Library Not Found")
+  endif()
+  set(PROTOC_BIN ${Protobuf_PROTOC_EXECUTABLE})
+endmacro()
 
 if(DEBUG)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -DDEBUG -DDEBUG_LEVEL_1 -DDEBUG_LEVEL_2")
@@ -134,6 +112,214 @@ if(USE_AVX512)
   endif ()
 endif()
 
+# Build Arrow macro
+macro(build_arrow STATIC_ARROW)
+  set(ARROW_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep-install")
+  message(STATUS "ARROW_PREFIX: ${ARROW_PREFIX}")
+  set(ARROW_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/arrow_ep")
+  message(STATUS "ARROW_SOURCE_DIR: ${ARROW_SOURCE_DIR}")
+  set(ARROW_INCLUDE_DIR "${ARROW_PREFIX}/include")
+  set(BINARY_RELEASE_DIR "${root_directory}/releases")
+
+  # Load libarrow
+  set(ARROW_LIB_NAME arrow)
+
+  # Load libgandiva
+  set(GANDIVA_LIB_NAME gandiva)
+
+  ExternalProject_Add(arrow_ep
+                      GIT_REPOSITORY https://github.com/oap-project/arrow.git
+                      SOURCE_DIR ${ARROW_SOURCE_DIR}
+                      GIT_TAG arrow-3.0.0-oap-1.1
+                      BUILD_IN_SOURCE 1
+                      INSTALL_DIR ${ARROW_PREFIX}
+                      INSTALL_COMMAND make install
+                      SOURCE_SUBDIR cpp
+                      CMAKE_ARGS 
+                      -DARROW_BUILD_STATIC=OFF
+                      -DARROW_BUILD_SHARED=ON
+                      -DARROW_COMPUTE=ON
+                      -DARROW_GANDIVA_JAVA=ON
+                      -DARROW_GANDIVA=ON
+                      -DARROW_PARQUET=ON
+                      -DARROW_HDFS=ON
+                      -DARROW_BOOST_USE_SHARED=OFF
+                      -DARROW_JNI=ON
+                      -DARROW_DATASET=ON
+                      -DARROW_WITH_PROTOBUF=ON
+                      -DARROW_WITH_SNAPPY=ON
+                      -DARROW_WITH_LZ4=ON
+                      -DARROW_WITH_ZSTD=OFF
+                      -DARROW_WITH_BROTLI=OFF
+                      -DARROW_WITH_ZLIB=OFF
+                      -DARROW_WITH_FASTPFOR=ON
+                      -DARROW_FILESYSTEM=ON
+                      -DARROW_JSON=ON
+                      -DARROW_FLIGHT=OFF
+                      -DARROW_JEMALLOC=ON
+                      -DARROW_SIMD_LEVEL=AVX2
+                      -DARROW_RUNTIME_SIMD_LEVEL=MAX
+                      -DARROW_DEPENDENCY_SOURCE=BUNDLED
+                      -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}
+                      -DCMAKE_INSTALL_LIBDIR=lib)
+
+  ExternalProject_Add_Step(arrow_ep java_install
+                      COMMAND mvn clean install -P arrow-jni -am -Darrow.cpp.build.dir=${ARROW_PREFIX}/lib -DskipTests -Dcheckstyle.skip
+                      COMMENT "Arrow Java maven install after CPP make install"
+                      DEPENDEES mkdir download update patch configure build install
+                      WORKING_DIRECTORY "${ARROW_SOURCE_DIR}/java"
+  )
+  add_dependencies(arrow_ep jni_proto)
+
+  file(MAKE_DIRECTORY "${ARROW_PREFIX}/include")
+
+  if(STATIC_ARROW)
+    # Load Static Arrow Library
+    message(FATAL_ERROR "Not Support Static Arrow")
+
+    set(THREADS_PREFER_PTHREAD_FLAG ON)
+    find_package(Threads REQUIRED)
+
+    set(ARROW_LIB_NAME arrow_bundled_dependencies)
+    set(
+      ARROW_STATIC_LIB
+      "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${ARROW_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+      )
+    add_library(Arrow::arrow STATIC IMPORTED)
+    set_target_properties(Arrow::arrow
+                          PROPERTIES IMPORTED_LOCATION "${ARROW_STATIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                      "${ARROW_PREFIX}/include")
+    add_dependencies(Arrow::arrow arrow_ep)
+
+    # Load Static Gandiva Library
+    set(
+      GANDIVA_STATIC_LIB
+      "${ARROW_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}"
+      )
+    add_library(Arrow::gandiva STATIC IMPORTED)
+    set_target_properties(Arrow::gandiva
+                          PROPERTIES IMPORTED_LOCATION "${GANDIVA_STATIC_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                      "${ARROW_PREFIX}/include")
+    add_dependencies(Arrow::gandiva arrow_ep)
+    target_link_libraries(spark_columnar_jni PRIVATE Arrow::arrow Arrow::gandiva Threads::Threads)
+
+  else()
+
+    # Copy Arrow Shared Library to releases directory for package jar
+    ExternalProject_Add_Step(arrow_ep copy_arrow_binary
+                      COMMAND cp ${ARROW_PREFIX}/lib/libarrow.so.300 ${root_directory}/releases/
+                      COMMENT "Copy libarrow.so.300 to releases/"
+                      DEPENDEES mkdir download update patch configure build install java_install
+                      WORKING_DIRECTORY "${ARROW_PREFIX}/"
+    )
+
+    ExternalProject_Add_Step(arrow_ep create_arrow_link
+                      COMMAND ln -s ${root_directory}/releases/libarrow.so.300 ${root_directory}/releases/libarrow.so
+                      COMMENT "Create libarrow.so soft link to releases/"
+                      DEPENDEES mkdir download update patch configure build install java_install copy_arrow_binary
+                      WORKING_DIRECTORY "${ARROW_PREFIX}/"
+    )
+
+    # Copy Gandiva Shared Library to releases directory for package jar
+    ExternalProject_Add_Step(arrow_ep copy_gandiva_binary
+                      COMMAND cp ${ARROW_PREFIX}/lib/libgandiva.so.300 ${root_directory}/releases/
+                      COMMENT "Copy libgandiva.so.300 to releases/"
+                      DEPENDEES mkdir download update patch configure build install java_install
+                      WORKING_DIRECTORY "${ARROW_PREFIX}/"
+    )
+
+    ExternalProject_Add_Step(arrow_ep create_gandiva_link
+                      COMMAND ln -s ${root_directory}/releases/libgandiva.so.300 ${root_directory}/releases/libgandiva.so
+                      COMMENT "Create libgandiva.so soft link to releases/"
+                      DEPENDEES mkdir download update patch configure build install java_install copy_gandiva_binary
+                      WORKING_DIRECTORY "${ARROW_PREFIX}/"
+    )
+
+    # Copy Arrow Headers to releases/include
+    ExternalProject_Add_Step(arrow_ep copy_arrow_header
+                      COMMAND cp -rf ${ARROW_PREFIX}/include/ ${root_directory}/releases/
+                      COMMENT "Arrow Header to releases/include"
+                      DEPENDEES mkdir download update patch configure build install java_install
+                      WORKING_DIRECTORY "${ARROW_PREFIX}/"
+    )
+
+    # Set up Arrow Shared Library Directory
+    set(
+      ARROW_SHARED_LIB
+      "${root_directory}/releases/${CMAKE_SHARED_LIBRARY_PREFIX}${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}"
+      )
+    add_library(Arrow::arrow SHARED IMPORTED)
+    set_target_properties(Arrow::arrow
+                          PROPERTIES IMPORTED_LOCATION "${ARROW_SHARED_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                      "${root_directory}/releases/include")
+    add_dependencies(Arrow::arrow arrow_ep)
+
+    # Set up Gandiva Shared Library Directory
+    set(
+      GANDIVA_SHARED_LIB
+      "${root_directory}/releases/${CMAKE_SHARED_LIBRARY_PREFIX}${GANDIVA_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}"
+      )
+    add_library(Arrow::gandiva SHARED IMPORTED)
+    set_target_properties(Arrow::gandiva
+                          PROPERTIES IMPORTED_LOCATION "${GANDIVA_SHARED_LIB}"
+                                     INTERFACE_INCLUDE_DIRECTORIES
+                                      "${root_directory}/releases/include")
+    add_dependencies(Arrow::gandiva arrow_ep)
+
+    target_link_libraries(spark_columnar_jni
+                      LINK_PUBLIC Arrow::arrow Arrow::gandiva)
+  endif()
+endmacro()
+
+# Find the existing Arrow library by using ARROW_RROT path
+macro(find_arrow)
+  set(ARROW_LIB_DIR "${ARROW_ROOT}/lib")
+  set(ARROW_LIB64_DIR "${ARROW_ROOT}/lib64")
+  message(STATUS "Set Arrow Library Directory in ${ARROW_LIB_DIR} or ${ARROW_LIB64_DIR}")
+  set(ARROW_INCLUDE_DIR "${ARROW_ROOT}/include")
+  message(STATUS "Set Arrow Include Directory in ${ARROW_INCLUDE_DIR}")
+
+  find_library(ARROW_LIB NAMES libarrow.so.300 PATHS ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} NO_DEFAULT_PATH)
+  if(NOT ARROW_LIB)
+    message(FATAL_ERROR "Arrow Library Not Found")
+  else()
+    message(STATUS "Arrow Library Can Be Found in ${ARROW_LIB}")
+  endif()
+
+  find_library(GANDIVA_LIB NAMES libgandiva.so.300 PATHS ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} NO_DEFAULT_PATH)
+  if(NOT GANDIVA_LIB)
+    message(FATAL_ERROR "Gandiva Library Not Found")
+  else()
+    message(STATUS "Gandiva Library Can Be Found in ${GANDIVA_LIB}")
+  endif()
+
+  file(COPY ${ARROW_LIB}.0.0 DESTINATION ${root_directory}/releases/)
+  file(COPY ${ARROW_LIB} DESTINATION ${root_directory}/releases/)
+  file(COPY ${GANDIVA_LIB}.0.0 DESTINATION ${root_directory}/releases/)
+  file(COPY ${GANDIVA_LIB} DESTINATION ${root_directory}/releases/)
+
+  target_link_libraries(spark_columnar_jni
+                      LINK_PUBLIC ${ARROW_LIB} ${GANDIVA_LIB})
+  target_include_directories(spark_columnar_jni PUBLIC ${ARROW_INCLUDE_DIR})
+endmacro()
+
+# Set up Proto
+file(MAKE_DIRECTORY ${root_directory}/src/proto)
+set(PROTO_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/proto")
+set(PROTO_OUTPUT_FILES "${PROTO_OUTPUT_DIR}/Exprs.pb.cc")
+set(PROTO_OUTPUT_FILES ${PROTO_OUTPUT_FILES} "${PROTO_OUTPUT_DIR}/Exprs.pb.h")
+
+set_source_files_properties(${PROTO_OUTPUT_FILES} PROPERTIES GENERATED TRUE)
+
+get_filename_component(ABS_GANDIVA_PROTO ${CMAKE_CURRENT_SOURCE_DIR}/proto/Exprs.proto
+                       ABSOLUTE)
+
+set(PROTO_SRCS "${PROTO_OUTPUT_DIR}/Exprs.pb.cc")
+set(PROTO_HDRS "${PROTO_OUTPUT_DIR}/Exprs.pb.h")
+
 if(TESTS)
   find_package(GTest)
 macro(package_add_test TESTNAME)
@@ -171,17 +357,6 @@ endmacro()
   add_subdirectory(benchmarks)
 endif()
 
-find_library(ARROW_LIB arrow)
-find_library(GANDIVA_LIB gandiva)
-
-if(NOT ARROW_LIB)
-    message(FATAL_ERROR "Arrow library not found")
-endif()
-
-if(NOT GANDIVA_LIB)
-    message(FATAL_ERROR "Gandiva library not found")
-endif()
-
 set(CODEGEN_HEADERS
     third_party/
     )
@@ -255,18 +430,44 @@ add_library(spark_columnar_jni SHARED ${SPARK_COLUMNAR_PLUGIN_SRCS} ${THIRDPARTY
 add_dependencies(spark_columnar_jni jni_proto)
 
 if(BUILD_PROTOBUF)
-target_link_libraries(spark_columnar_jni
-                      LINK_PUBLIC ${ARROW_LIB} ${PARQUET_LIB} ${GANDIVA_LIB}
-                      LINK_PRIVATE protobuf::libprotobuf)
+  build_protobuf()
+  message(STATUS "Building ProtoBuf from Source: ${BUILD_PROTOBUF}")
+  target_link_libraries(spark_columnar_jni
+                        LINK_PRIVATE protobuf::libprotobuf)
 else()
-target_link_libraries(spark_columnar_jni
-                      LINK_PUBLIC ${ARROW_LIB} ${PARQUET_LIB} ${GANDIVA_LIB}  ${PROTOBUF_LIBRARY})
+  find_protobuf()
+  message(STATUS "Use existing ProtoBuf libraries: ${PROTOBUF_LIBRARY}")
+  target_link_libraries(spark_columnar_jni
+                        LINK_PUBLIC ${PROTOBUF_LIBRARY})
 endif()
+
+add_custom_command(OUTPUT ${PROTO_OUTPUT_FILES}
+                   COMMAND ${PROTOC_BIN}
+                           --proto_path
+                           ${CMAKE_CURRENT_SOURCE_DIR}/proto
+                           --cpp_out
+                           ${PROTO_OUTPUT_DIR}
+                           ${CMAKE_CURRENT_SOURCE_DIR}/proto/Exprs.proto
+                   DEPENDS  ${ABS_GANDIVA_PROTO}
+                   COMMENT "Running PROTO compiler on Exprs.proto"
+                   VERBATIM)
+add_custom_target(jni_proto ALL DEPENDS ${PROTO_OUTPUT_FILES})
+add_dependencies(jni_proto protobuf::libprotobuf)
 target_include_directories(spark_columnar_jni PUBLIC ${CMAKE_SYSTEM_INCLUDE_PATH} ${JNI_INCLUDE_DIRS} ${source_root_directory} ${PROTO_OUTPUT_DIR} ${PROTOBUF_INCLUDE})
 set_target_properties(spark_columnar_jni PROPERTIES
                       LIBRARY_OUTPUT_DIRECTORY ${root_directory}/releases
 )
 
+# Build Arrow
+message(STATUS "Building ARROW from Source: ${BUILD_ARROW}")
+if(BUILD_ARROW)
+  build_arrow(${STATIC_ARROW})
+  message(STATUS "Building Static ARROW: ${STATIC_ARROW}")
+else() #
+  find_arrow()
+  message(STATUS "Use existing ARROW libraries")
+endif()
+
 if(DEFINED ENV{HADOOP_HOME})
   set(LIBHDFS3_DESTINATION $ENV{HADOOP_HOME}/lib/native)
 else()
diff --git a/native-sql-engine/cpp/src/benchmarks/shuffle_split_benchmark.cc b/native-sql-engine/cpp/src/benchmarks/shuffle_split_benchmark.cc
index e40778ba8..8c6e15668 100644
--- a/native-sql-engine/cpp/src/benchmarks/shuffle_split_benchmark.cc
+++ b/native-sql-engine/cpp/src/benchmarks/shuffle_split_benchmark.cc
@@ -26,7 +26,9 @@
 #include <parquet/arrow/reader.h>
 #include <parquet/file_reader.h>
 #include <shuffle/splitter.h>
+
 #include <chrono>
+
 #include "codegen/code_generator.h"
 #include "codegen/code_generator_factory.h"
 #include "tests/test_utils.h"
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/code_generator.h b/native-sql-engine/cpp/src/codegen/arrow_compute/code_generator.h
index 0710f0617..2015499c2 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/code_generator.h
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/code_generator.h
@@ -35,8 +35,7 @@ namespace arrowcompute {
 class ArrowComputeCodeGenerator : public CodeGenerator {
  public:
   ArrowComputeCodeGenerator(
-      arrow::MemoryPool* memory_pool,
-      std::shared_ptr<arrow::Schema> schema_ptr,
+      arrow::MemoryPool* memory_pool, std::shared_ptr<arrow::Schema> schema_ptr,
       std::vector<std::shared_ptr<gandiva::Expression>> expr_vector,
       std::vector<std::shared_ptr<arrow::Field>> ret_types, bool return_when_finish,
       std::vector<std::shared_ptr<::gandiva::Expression>> finish_exprs_vector)
@@ -50,13 +49,13 @@ class ArrowComputeCodeGenerator : public CodeGenerator {
     for (auto expr : expr_vector) {
       std::shared_ptr<ExprVisitor> root_visitor;
       if (finish_exprs_vector.empty()) {
-        auto visitor = MakeExprVisitor(memory_pool, schema_ptr, expr, ret_types_, &expr_visitor_cache_,
-                                       &root_visitor);
+        auto visitor = MakeExprVisitor(memory_pool, schema_ptr, expr, ret_types_,
+                                       &expr_visitor_cache_, &root_visitor);
         auto status = DistinctInsert(root_visitor, &visitor_list_);
       } else {
-        auto visitor =
-            MakeExprVisitor(memory_pool, schema_ptr, expr, ret_types_, finish_exprs_vector[i++],
-                            &expr_visitor_cache_, &root_visitor);
+        auto visitor = MakeExprVisitor(memory_pool, schema_ptr, expr, ret_types_,
+                                       finish_exprs_vector[i++], &expr_visitor_cache_,
+                                       &root_visitor);
         auto status = DistinctInsert(root_visitor, &visitor_list_);
       }
     }
@@ -309,7 +308,8 @@ class ArrowComputeCodeGenerator : public CodeGenerator {
     for (auto column : batch) {
       if (length != 0 && length != column->length()) {
         return arrow::Status::Invalid(
-            "ArrowCompute MakeBatchFromBatch found batch contains columns with different "
+            "ArrowCompute MakeBatchFromBatch found batch contains columns with "
+            "different "
             "lengths, expect ",
             length, " while got ", column->length(), " from ", i, "th column.");
       }
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor.cc
index 7935f2b1e..bbc9a7cb3 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor.cc
@@ -101,7 +101,8 @@ arrow::Status BuilderVisitor::Visit(const gandiva::FunctionNode& node) {
         case BuilderVisitorNodeType::FunctionNode: {
           if (dependency) {
             return arrow::Status::Invalid(
-                "BuilderVisitor build ExprVisitor failed, got two depency while only "
+                "BuilderVisitor build ExprVisitor failed, got two depency "
+                "while only "
                 "support one.");
           }
           RETURN_NOT_OK(child_visitor->GetResult(&dependency));
@@ -117,8 +118,8 @@ arrow::Status BuilderVisitor::Visit(const gandiva::FunctionNode& node) {
       }
     }
 
-    // Add a new type of Function "Action", which will not create a new expr_visitor,
-    // instead, it will register itself to its dependency
+    // Add a new type of Function "Action", which will not create a new
+    // expr_visitor, instead, it will register itself to its dependency
     if (func_name.compare(0, 7, "action_") == 0) {
       if (dependency) {
         RETURN_NOT_OK(dependency->AppendAction(func_name, param_names));
@@ -130,7 +131,8 @@ arrow::Status BuilderVisitor::Visit(const gandiva::FunctionNode& node) {
         return arrow::Status::OK();
       } else {
         return arrow::Status::Invalid(
-            "BuilderVisitor is processing an action without dependency, this is "
+            "BuilderVisitor is processing an action without dependency, this "
+            "is "
             "invalid.");
       }
     }
@@ -671,7 +673,8 @@ arrow::Status ExprVisitor::MakeResultIterator(std::shared_ptr<arrow::Schema> sch
     RETURN_NOT_OK(impl_->MakeResultIterator(schema, out));
   } else {
     return arrow::Status::NotImplemented(
-        "FinishVsitor MakeResultIterator is not tested, so mark as not implemented "
+        "FinishVsitor MakeResultIterator is not tested, so mark as not "
+        "implemented "
         "here, "
         "codes are commented.");
   }
@@ -697,7 +700,8 @@ arrow::Status ExprVisitor::GetResult(
     std::vector<std::shared_ptr<arrow::Field>>* out_fields) {
   if (result_batch_list_.empty()) {
     return arrow::Status::Invalid(
-        "ArrowComputeExprVisitor::GetResult result_batch_list was not generated ",
+        "ArrowComputeExprVisitor::GetResult result_batch_list was not "
+        "generated ",
         func_name_);
   }
   *out = result_batch_list_;
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor.h b/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor.h
index 0789ffb0b..99b7b91f3 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor.h
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor.h
@@ -113,16 +113,15 @@ class ExprVisitor : public std::enable_shared_from_this<ExprVisitor> {
                                   const gandiva::FunctionNode& node,
                                   std::shared_ptr<ExprVisitor>* out);
 
-  ExprVisitor(arrow::compute::ExecContext ctx,
-              std::shared_ptr<arrow::Schema> schema_ptr, std::string func_name,
-              std::vector<std::string> param_field_names,
+  ExprVisitor(arrow::compute::ExecContext ctx, std::shared_ptr<arrow::Schema> schema_ptr,
+              std::string func_name, std::vector<std::string> param_field_names,
               std::shared_ptr<ExprVisitor> dependency,
               std::shared_ptr<gandiva::Node> finish_func);
 
   ExprVisitor(arrow::compute::ExecContext ctx, std::string func_name);
 
-  ExprVisitor(arrow::compute::ExecContext ctx,
-              std::shared_ptr<arrow::Schema> schema_ptr, std::string func_name);
+  ExprVisitor(arrow::compute::ExecContext ctx, std::shared_ptr<arrow::Schema> schema_ptr,
+              std::string func_name);
 
   ~ExprVisitor() {
 #ifdef DEBUG
@@ -147,8 +146,7 @@ class ExprVisitor : public std::enable_shared_from_this<ExprVisitor> {
       std::shared_ptr<gandiva::FunctionNode> partition_spec,
       std::shared_ptr<gandiva::FunctionNode> order_spec,
       std::shared_ptr<gandiva::FunctionNode> frame_spec,
-      std::vector<std::shared_ptr<arrow::Field>> ret_fields,
-      ExprVisitor* p);
+      std::vector<std::shared_ptr<arrow::Field>> ret_fields, ExprVisitor* p);
   arrow::Status AppendAction(const std::string& func_name,
                              std::vector<std::string> param_name);
   arrow::Status Init();
@@ -209,8 +207,8 @@ class ExprVisitor : public std::enable_shared_from_this<ExprVisitor> {
   std::vector<int> in_batch_size_array_;
   ArrayList in_batch_;
   std::shared_ptr<arrow::Array> in_array_;
-  // group_indices is used to tell item in array_list_ and batch_list_ belong to which
-  // group
+  // group_indices is used to tell item in array_list_ and batch_list_ belong to
+  // which group
   std::vector<int> group_indices_;
 
   // Output data types.
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor_impl.h b/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor_impl.h
index 116a97a86..b0aac1773 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor_impl.h
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/expr_visitor_impl.h
@@ -205,14 +205,17 @@ class WindowVisitorImpl : public ExprVisitorImpl {
         for (auto col_id : partition_field_ids_) {
           if (col_id >= p_->in_record_batch_->num_columns()) {
             return arrow::Status::Invalid(
-                "WindowVisitorImpl: Partition field number overflows defined column "
+                "WindowVisitorImpl: Partition field number overflows defined "
+                "column "
                 "count");
           }
           auto col = p_->in_record_batch_->column(col_id);
           in1.push_back(col);
         }
 #ifdef DEBUG
-        std::cout << "[window kernel] Calling concat_kernel_->Evaluate(in1, &out1) on batch... " << std::endl;
+        std::cout << "[window kernel] Calling concat_kernel_->Evaluate(in1, "
+                     "&out1) on batch... "
+                  << std::endl;
 #endif
         RETURN_NOT_OK(concat_kernel_->Evaluate(in1, &out1));
 #ifdef DEBUG
@@ -222,7 +225,9 @@ class WindowVisitorImpl : public ExprVisitorImpl {
 
       std::shared_ptr<arrow::Array> in2 = out1;
 #ifdef DEBUG
-      std::cout << "[window kernel] Calling partition_kernel_->Evaluate(in2, &out2) on batch... " << std::endl;
+      std::cout << "[window kernel] Calling partition_kernel_->Evaluate(in2, "
+                   "&out2) on batch... "
+                << std::endl;
 #endif
       RETURN_NOT_OK(partition_kernel_->Evaluate(in2, &out2));
 #ifdef DEBUG
@@ -235,7 +240,8 @@ class WindowVisitorImpl : public ExprVisitorImpl {
       for (auto col_id : function_param_field_ids_.at(func_id)) {
         if (col_id >= p_->in_record_batch_->num_columns()) {
           return arrow::Status::Invalid(
-              "WindowVisitorImpl: Function parameter number overflows defined column "
+              "WindowVisitorImpl: Function parameter number overflows defined "
+              "column "
               "count");
         }
         auto col = p_->in_record_batch_->column(col_id);
@@ -243,7 +249,9 @@ class WindowVisitorImpl : public ExprVisitorImpl {
       }
       in3.push_back(out2);
 #ifdef DEBUG
-      std::cout << "[window kernel] Calling function_kernels_.at(func_id)->Evaluate(in3) on batch... " << std::endl;
+      std::cout << "[window kernel] Calling "
+                   "function_kernels_.at(func_id)->Evaluate(in3) on batch... "
+                << std::endl;
 #endif
       RETURN_NOT_OK(function_kernels_.at(func_id)->Evaluate(in3));
 #ifdef DEBUG
@@ -283,7 +291,8 @@ class WindowVisitorImpl : public ExprVisitorImpl {
           length = arr->length();
         } else if (length != arr->length()) {
           return arrow::Status::Invalid(
-              "WindowVisitorImpl: Return array length in the same batch are not the same "
+              "WindowVisitorImpl: Return array length in the same batch are "
+              "not the same "
               "for "
               "different window functions");
         }
@@ -291,7 +300,8 @@ class WindowVisitorImpl : public ExprVisitorImpl {
       }
       if (length == -1) {
         return arrow::Status::Invalid(
-            "WindowVisitorImpl: No valid batch length returned for window functions");
+            "WindowVisitorImpl: No valid batch length returned for window "
+            "functions");
       }
       out.push_back(temp);
       out_sizes.push_back(length);
@@ -392,7 +402,8 @@ class EncodeVisitorImpl : public ExprVisitorImpl {
   int hash_table_type_;
 };
 
-////////////////////////// SortArraysToIndicesVisitorImpl ///////////////////////
+////////////////////////// SortArraysToIndicesVisitorImpl
+//////////////////////////
 class SortArraysToIndicesVisitorImpl : public ExprVisitorImpl {
  public:
   SortArraysToIndicesVisitorImpl(std::vector<std::shared_ptr<arrow::Field>> field_list,
@@ -481,7 +492,8 @@ class SortArraysToIndicesVisitorImpl : public ExprVisitorImpl {
       } break;
       default:
         return arrow::Status::NotImplemented(
-            "SortArraysToIndicesVisitorImpl: Does not support this type of input.");
+            "SortArraysToIndicesVisitorImpl: Does not support this type of "
+            "input.");
     }
     return arrow::Status::OK();
   }
@@ -500,7 +512,8 @@ class SortArraysToIndicesVisitorImpl : public ExprVisitorImpl {
       } break;
       default:
         return arrow::Status::Invalid(
-            "SortArraysToIndicesVisitorImpl MakeResultIterator does not support "
+            "SortArraysToIndicesVisitorImpl MakeResultIterator does not "
+            "support "
             "dependency type other than Batch.");
     }
     return arrow::Status::OK();
@@ -610,7 +623,8 @@ class CachedRelationVisitorImpl : public ExprVisitorImpl {
   std::shared_ptr<arrow::Schema> result_schema_;
 };
 
-////////////////////////// ConditionedProbeArraysVisitorImpl ///////////////////////
+////////////////////////// ConditionedProbeArraysVisitorImpl
+//////////////////////////
 class ConditionedProbeArraysVisitorImpl : public ExprVisitorImpl {
  public:
   ConditionedProbeArraysVisitorImpl(std::vector<std::shared_ptr<arrow::Field>> field_list,
@@ -704,7 +718,8 @@ class ConditionedProbeArraysVisitorImpl : public ExprVisitorImpl {
       } break;
       default:
         return arrow::Status::Invalid(
-            "ConditionedProbeArraysVisitorImpl MakeResultIterator does not support "
+            "ConditionedProbeArraysVisitorImpl MakeResultIterator does not "
+            "support "
             "dependency type other than Batch.");
     }
     return arrow::Status::OK();
@@ -724,7 +739,8 @@ class ConditionedProbeArraysVisitorImpl : public ExprVisitorImpl {
   gandiva::NodeVector hash_configuration_list_;
 };
 
-////////////////////////// ConditionedJoinArraysVisitorImpl ///////////////////////
+////////////////////////// ConditionedJoinArraysVisitorImpl
+//////////////////////////
 class ConditionedJoinArraysVisitorImpl : public ExprVisitorImpl {
  public:
   ConditionedJoinArraysVisitorImpl(
@@ -798,7 +814,8 @@ class ConditionedJoinArraysVisitorImpl : public ExprVisitorImpl {
       } break;
       default:
         return arrow::Status::Invalid(
-            "ConditionedJoinArraysVisitorImpl MakeResultIterator does not support "
+            "ConditionedJoinArraysVisitorImpl MakeResultIterator does not "
+            "support "
             "dependency type other than Batch.");
     }
     return arrow::Status::OK();
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/action_codegen.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/action_codegen.h
index b4d5c35ab..4c4d830e3 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/action_codegen.h
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/action_codegen.h
@@ -280,10 +280,12 @@ class GroupByActionCodeGen : public ActionCodeGen {
         validity_name = "action_groupby_" + name + "_validity_";
         GetTypedArrayCastString(data_type, input_list[0]);
         input_expr_list_.push_back(gandiva::TreeExprBuilder::MakeField(
-            input_fields_list[0]));  // this line is used to gen hash for multiple keys
+            input_fields_list[0]));  // this line is used to gen hash for
+                                     // multiple keys
       }
-      typed_input_and_prepare_list_.push_back(std::make_pair(
-          "", ""));  // when there is two name in sig list, we need to make others aligned
+      typed_input_and_prepare_list_.push_back(
+          std::make_pair("", ""));  // when there is two name in sig list, we
+                                    // need to make others aligned
 
       if (keep == false) {
         return;
@@ -411,8 +413,9 @@ class SumActionCodeGen : public ActionCodeGen {
         validity_name = "action_sum_" + name + "_validity_";
         GetTypedArrayCastString(data_type, input_list[0]);
       }
-      typed_input_and_prepare_list_.push_back(std::make_pair(
-          "", ""));  // when there is two name in sig list, we need to make others aligned
+      typed_input_and_prepare_list_.push_back(
+          std::make_pair("", ""));  // when there is two name in sig list, we
+                                    // need to make others aligned
       func_sig_list_.push_back(sig_name);
       func_sig_list_.push_back(validity_name);
       auto tmp_name = typed_input_and_prepare_list_[0].first + "_tmp";
@@ -1769,12 +1772,12 @@ class StddevSampFinalActionCodeGen : public ActionCodeGen {
     on_new_codes_list_.push_back("");
     on_finish_codes_list_.push_back(
         "if (" + count_name + "[i] - 1 < 0.00001) {\n" + validity_name +
-        ".push_back(true);\n" + 
-        sig_name + ".push_back(std::numeric_limits<double>::quiet_NaN());}\n" +
-        "else if (" + count_name + "[i] < 0.00001) {\n" + validity_name +
-        ".push_back(false);\n" + sig_name + ".push_back(0);}\n" + "else {\n" +
-        validity_name + ".push_back(true);\n" + sig_name + ".push_back(" + "sqrt(" +
-        m2_name + "[i] / (" + count_name + "[i] - 1)));}\n");
+        ".push_back(true);\n" + sig_name +
+        ".push_back(std::numeric_limits<double>::quiet_NaN());}\n" + "else if (" +
+        count_name + "[i] < 0.00001) {\n" + validity_name + ".push_back(false);\n" +
+        sig_name + ".push_back(0);}\n" + "else {\n" + validity_name +
+        ".push_back(true);\n" + sig_name + ".push_back(" + "sqrt(" + m2_name + "[i] / (" +
+        count_name + "[i] - 1)));}\n");
     on_finish_codes_list_.push_back("");
 
     finish_variable_list_.push_back(sig_name);
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.cc
index e232f2b84..55d5c65c7 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.cc
@@ -3943,10 +3943,9 @@ arrow::Status MakeStddevSampFinalAction(
     /*case arrow::Decimal128Type::type_id: {
       auto action_ptr = std::make_shared<
           StddevSampFinalAction<arrow::Decimal128Type, arrow::Decimal128,
-                                arrow::Decimal128Type, arrow::Decimal128>>(ctx, type,
-                                                                           type);
-      *out = std::dynamic_pointer_cast<ActionBase>(action_ptr);
-    } break;*/
+                                arrow::Decimal128Type, arrow::Decimal128>>(ctx,
+    type, type); *out = std::dynamic_pointer_cast<ActionBase>(action_ptr); }
+    break;*/
 #undef PROCESS
     default:
       break;
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.h
index 7f2c84f4d..eb2bfa664 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.h
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/actions_impl.h
@@ -15,6 +15,7 @@
  * limitations under the License.
  */
 
+#pragma once
 #include <arrow/builder.h>
 #include <arrow/compute/api.h>
 #include <arrow/status.h>
@@ -128,4 +129,4 @@ arrow::Status MakeStddevSampFinalAction(
 }  // namespace extra
 }  // namespace arrowcompute
 }  // namespace codegen
-}  // namespace sparkcolumnarplugin
\ No newline at end of file
+}  // namespace sparkcolumnarplugin
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/array_appender.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/array_appender.h
index 763694aea..44ce32dc0 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/array_appender.h
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/array_appender.h
@@ -68,6 +68,10 @@ class AppenderBase {
     return arrow::Status::NotImplemented("AppenderBase Finish is abstract.");
   }
 
+  virtual arrow::Status Reserve(uint64_t) {
+    return arrow::Status::NotImplemented("AppenderBase Reset is abstract.");
+  }
+
   virtual arrow::Status Reset() {
     return arrow::Status::NotImplemented("AppenderBase Reset is abstract.");
   }
@@ -82,7 +86,7 @@ class ArrayAppender {};
 
 template <typename T>
 using is_number_or_date = std::integral_constant<bool, arrow::is_number_type<T>::value ||
-                                                       arrow::is_date_type<T>::value>;
+                                                           arrow::is_date_type<T>::value>;
 
 template <typename DataType, typename R = void>
 using enable_if_number_or_date = std::enable_if_t<is_number_or_date<DataType>::value, R>;
@@ -114,7 +118,7 @@ class ArrayAppender<DataType, enable_if_number_or_date<DataType>> : public Appen
   }
 
   arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id) override {
-    if (has_null_ && cached_arr_[array_id]->null_count() > 0 && 
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
         cached_arr_[array_id]->IsNull(item_id)) {
       RETURN_NOT_OK(builder_->AppendNull());
     } else {
@@ -126,7 +130,7 @@ class ArrayAppender<DataType, enable_if_number_or_date<DataType>> : public Appen
   arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id,
                        int repeated) override {
     if (repeated == 0) return arrow::Status::OK();
-    if (has_null_ && cached_arr_[array_id]->null_count() > 0 && 
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
         cached_arr_[array_id]->IsNull(item_id)) {
       RETURN_NOT_OK(builder_->AppendNulls(repeated));
     } else {
@@ -140,7 +144,7 @@ class ArrayAppender<DataType, enable_if_number_or_date<DataType>> : public Appen
 
   arrow::Status Append(const std::vector<ArrayItemIndex>& index_list) {
     for (auto tmp : index_list) {
-      if (has_null_ && cached_arr_[tmp.array_id]->null_count() > 0 && 
+      if (has_null_ && cached_arr_[tmp.array_id]->null_count() > 0 &&
           cached_arr_[tmp.array_id]->IsNull(tmp.id)) {
         RETURN_NOT_OK(builder_->AppendNull());
       } else {
@@ -201,7 +205,7 @@ class ArrayAppender<DataType, arrow::enable_if_string_like<DataType>>
   }
 
   arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id) override {
-    if (has_null_ && cached_arr_[array_id]->null_count() > 0 && 
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
         cached_arr_[array_id]->IsNull(item_id)) {
       RETURN_NOT_OK(builder_->AppendNull());
     } else {
@@ -213,7 +217,7 @@ class ArrayAppender<DataType, arrow::enable_if_string_like<DataType>>
   arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id,
                        int repeated) override {
     if (repeated == 0) return arrow::Status::OK();
-    if (has_null_ && cached_arr_[array_id]->null_count() > 0 && 
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
         cached_arr_[array_id]->IsNull(item_id)) {
       RETURN_NOT_OK(builder_->AppendNulls(repeated));
     } else {
@@ -227,7 +231,7 @@ class ArrayAppender<DataType, arrow::enable_if_string_like<DataType>>
 
   arrow::Status Append(const std::vector<ArrayItemIndex>& index_list) {
     for (auto tmp : index_list) {
-      if (has_null_ && cached_arr_[tmp.array_id]->null_count() > 0 && 
+      if (has_null_ && cached_arr_[tmp.array_id]->null_count() > 0 &&
           cached_arr_[tmp.array_id]->IsNull(tmp.id)) {
         RETURN_NOT_OK(builder_->AppendNull());
       } else {
@@ -286,7 +290,7 @@ class ArrayAppender<DataType, arrow::enable_if_boolean<DataType>> : public Appen
   }
 
   arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id) override {
-    if (has_null_ && cached_arr_[array_id]->null_count() > 0 && 
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
         cached_arr_[array_id]->IsNull(item_id)) {
       RETURN_NOT_OK(builder_->AppendNull());
     } else {
@@ -298,7 +302,7 @@ class ArrayAppender<DataType, arrow::enable_if_boolean<DataType>> : public Appen
   arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id,
                        int repeated) override {
     if (repeated == 0) return arrow::Status::OK();
-    if (has_null_ && cached_arr_[array_id]->null_count() > 0 && 
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
         cached_arr_[array_id]->IsNull(item_id)) {
       RETURN_NOT_OK(builder_->AppendNulls(repeated));
     } else {
@@ -312,7 +316,7 @@ class ArrayAppender<DataType, arrow::enable_if_boolean<DataType>> : public Appen
 
   arrow::Status Append(const std::vector<ArrayItemIndex>& index_list) {
     for (auto tmp : index_list) {
-      if (has_null_ && cached_arr_[tmp.array_id]->null_count() > 0 && 
+      if (has_null_ && cached_arr_[tmp.array_id]->null_count() > 0 &&
           cached_arr_[tmp.array_id]->IsNull(tmp.id)) {
         RETURN_NOT_OK(builder_->AppendNull());
       } else {
@@ -469,6 +473,422 @@ static arrow::Status MakeAppender(arrow::compute::ExecContext* ctx,
 }
 #undef PROCESS_SUPPORTED_TYPES
 
+/// unsafe appender ////
+template <typename DataType, typename Enable = void>
+class UnsafeArrayAppender {};
+
+template <typename DataType>
+class UnsafeArrayAppender<DataType, enable_if_number_or_date<DataType>>
+    : public AppenderBase {
+ public:
+  UnsafeArrayAppender(arrow::compute::ExecContext* ctx, AppenderType type = left)
+      : ctx_(ctx), type_(type) {
+    std::unique_ptr<arrow::ArrayBuilder> array_builder;
+    arrow::MakeBuilder(ctx_->memory_pool(), arrow::TypeTraits<DataType>::type_singleton(),
+                       &array_builder);
+    builder_.reset(arrow::internal::checked_cast<BuilderType_*>(array_builder.release()));
+  }
+  ~UnsafeArrayAppender() {}
+
+  AppenderType GetType() override { return type_; }
+  arrow::Status AddArray(const std::shared_ptr<arrow::Array>& arr) override {
+    auto typed_arr_ = std::dynamic_pointer_cast<ArrayType_>(arr);
+    cached_arr_.emplace_back(typed_arr_);
+    if (typed_arr_->null_count() > 0) has_null_ = true;
+    return arrow::Status::OK();
+  }
+
+  arrow::Status PopArray() override {
+    cached_arr_.pop_back();
+    has_null_ = false;
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id) override {
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
+        cached_arr_[array_id]->IsNull(item_id)) {
+      builder_->UnsafeAppendNull();
+    } else {
+      builder_->UnsafeAppend(cached_arr_[array_id]->GetView(item_id));
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id,
+                       int repeated) override {
+    if (repeated == 0) return arrow::Status::OK();
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
+        cached_arr_[array_id]->IsNull(item_id)) {
+      // TODO: unloop here and use unsafeappend
+      RETURN_NOT_OK(builder_->AppendNulls(repeated));
+    } else {
+      auto val = cached_arr_[array_id]->GetView(item_id);
+      std::vector<CType> values;
+      values.resize(repeated, val);
+      // TODO: unloop here and use unsafeappend
+      RETURN_NOT_OK(builder_->AppendValues(values.data(), repeated));
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const std::vector<ArrayItemIndex>& index_list) {
+    for (auto tmp : index_list) {
+      if (has_null_ && cached_arr_[tmp.array_id]->null_count() > 0 &&
+          cached_arr_[tmp.array_id]->IsNull(tmp.id)) {
+        builder_->AppendNull();
+      } else {
+        builder_->UnsafeAppend(cached_arr_[tmp.array_id]->GetView(tmp.id));
+      }
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status AppendNull() override {
+    // TODO: use unsafe append
+    return builder_->AppendNull();
+  }
+
+  arrow::Status Finish(std::shared_ptr<arrow::Array>* out_) override {
+    auto status = builder_->Finish(out_);
+    return status;
+  }
+
+  arrow::Status Reserve(uint64_t len) override {
+    builder_->Reserve(len);
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Reset() override {
+    builder_->Reset();
+    return arrow::Status::OK();
+  }
+
+ private:
+  using BuilderType_ = typename arrow::TypeTraits<DataType>::BuilderType;
+  using ArrayType_ = typename arrow::TypeTraits<DataType>::ArrayType;
+  using CType = typename arrow::TypeTraits<DataType>::CType;
+  std::unique_ptr<BuilderType_> builder_;
+  std::vector<std::shared_ptr<ArrayType_>> cached_arr_;
+  arrow::compute::ExecContext* ctx_;
+  AppenderType type_;
+  bool has_null_ = false;
+};
+
+// TODO(): this is a fake unsafeappende for string array
+template <typename DataType>
+class UnsafeArrayAppender<DataType, arrow::enable_if_string_like<DataType>>
+    : public AppenderBase {
+ public:
+  UnsafeArrayAppender(arrow::compute::ExecContext* ctx, AppenderType type = left)
+      : ctx_(ctx), type_(type) {
+    std::unique_ptr<arrow::ArrayBuilder> array_builder;
+    arrow::MakeBuilder(ctx_->memory_pool(), arrow::TypeTraits<DataType>::type_singleton(),
+                       &array_builder);
+    builder_.reset(arrow::internal::checked_cast<BuilderType_*>(array_builder.release()));
+  }
+  ~UnsafeArrayAppender() {}
+
+  AppenderType GetType() override { return type_; }
+  arrow::Status AddArray(const std::shared_ptr<arrow::Array>& arr) override {
+    auto typed_arr_ = std::dynamic_pointer_cast<ArrayType_>(arr);
+    cached_arr_.emplace_back(typed_arr_);
+    if (typed_arr_->null_count() > 0) has_null_ = true;
+    return arrow::Status::OK();
+  }
+
+  arrow::Status PopArray() override {
+    cached_arr_.pop_back();
+    has_null_ = false;
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id) override {
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
+        cached_arr_[array_id]->IsNull(item_id)) {
+      RETURN_NOT_OK(builder_->AppendNull());
+    } else {
+      RETURN_NOT_OK(builder_->Append(cached_arr_[array_id]->GetView(item_id)));
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id,
+                       int repeated) override {
+    if (repeated == 0) return arrow::Status::OK();
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
+        cached_arr_[array_id]->IsNull(item_id)) {
+      RETURN_NOT_OK(builder_->AppendNulls(repeated));
+    } else {
+      auto val = cached_arr_[array_id]->GetView(item_id);
+      for (int i = 0; i < repeated; i++) {
+        RETURN_NOT_OK(builder_->Append(val));
+      }
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const std::vector<ArrayItemIndex>& index_list) {
+    for (auto tmp : index_list) {
+      if (has_null_ && cached_arr_[tmp.array_id]->null_count() > 0 &&
+          cached_arr_[tmp.array_id]->IsNull(tmp.id)) {
+        RETURN_NOT_OK(builder_->AppendNull());
+      } else {
+        RETURN_NOT_OK(builder_->Append(cached_arr_[tmp.array_id]->GetView(tmp.id)));
+      }
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status AppendNull() override { return builder_->AppendNull(); }
+
+  arrow::Status Finish(std::shared_ptr<arrow::Array>* out_) override {
+    auto status = builder_->Finish(out_);
+    return status;
+  }
+
+  arrow::Status Reserve(uint64_t len) override {
+    // builder_->Reserve(len);
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Reset() override {
+    builder_->Reset();
+    return arrow::Status::OK();
+  }
+
+ private:
+  using BuilderType_ = typename arrow::TypeTraits<DataType>::BuilderType;
+  using ArrayType_ = typename arrow::TypeTraits<DataType>::ArrayType;
+  std::unique_ptr<BuilderType_> builder_;
+  std::vector<std::shared_ptr<ArrayType_>> cached_arr_;
+  arrow::compute::ExecContext* ctx_;
+  AppenderType type_;
+  bool has_null_ = false;
+};
+
+// TOOD(): this is a fake unsafeappender for boolean array
+template <typename DataType>
+class UnsafeArrayAppender<DataType, arrow::enable_if_boolean<DataType>>
+    : public AppenderBase {
+ public:
+  UnsafeArrayAppender(arrow::compute::ExecContext* ctx, AppenderType type = left)
+      : ctx_(ctx), type_(type) {
+    std::unique_ptr<arrow::ArrayBuilder> array_builder;
+    arrow::MakeBuilder(ctx_->memory_pool(), arrow::TypeTraits<DataType>::type_singleton(),
+                       &array_builder);
+    builder_.reset(arrow::internal::checked_cast<BuilderType_*>(array_builder.release()));
+  }
+  ~UnsafeArrayAppender() {}
+
+  AppenderType GetType() override { return type_; }
+  arrow::Status AddArray(const std::shared_ptr<arrow::Array>& arr) override {
+    auto typed_arr_ = std::dynamic_pointer_cast<ArrayType_>(arr);
+    cached_arr_.emplace_back(typed_arr_);
+    if (typed_arr_->null_count() > 0) has_null_ = true;
+    return arrow::Status::OK();
+  }
+
+  arrow::Status PopArray() override {
+    cached_arr_.pop_back();
+    has_null_ = false;
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id) override {
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
+        cached_arr_[array_id]->IsNull(item_id)) {
+      RETURN_NOT_OK(builder_->AppendNull());
+    } else {
+      RETURN_NOT_OK(builder_->Append(cached_arr_[array_id]->GetView(item_id)));
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id,
+                       int repeated) override {
+    if (repeated == 0) return arrow::Status::OK();
+    if (has_null_ && cached_arr_[array_id]->null_count() > 0 &&
+        cached_arr_[array_id]->IsNull(item_id)) {
+      RETURN_NOT_OK(builder_->AppendNulls(repeated));
+    } else {
+      auto val = cached_arr_[array_id]->GetView(item_id);
+      for (int i = 0; i < repeated; i++) {
+        RETURN_NOT_OK(builder_->Append(val));
+      }
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const std::vector<ArrayItemIndex>& index_list) {
+    for (auto tmp : index_list) {
+      if (has_null_ && cached_arr_[tmp.array_id]->null_count() > 0 &&
+          cached_arr_[tmp.array_id]->IsNull(tmp.id)) {
+        RETURN_NOT_OK(builder_->AppendNull());
+      } else {
+        RETURN_NOT_OK(builder_->Append(cached_arr_[tmp.array_id]->GetView(tmp.id)));
+      }
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status AppendNull() override { return builder_->AppendNull(); }
+
+  arrow::Status AppendExistence(bool is_exist) { return builder_->Append(is_exist); }
+
+  arrow::Status Finish(std::shared_ptr<arrow::Array>* out_) override {
+    auto status = builder_->Finish(out_);
+    return status;
+  }
+
+  arrow::Status Reserve(uint64_t len) override {
+    // builder_->Reserve(len);
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Reset() override {
+    builder_->Reset();
+    return arrow::Status::OK();
+  }
+
+ private:
+  using BuilderType_ = typename arrow::TypeTraits<DataType>::BuilderType;
+  using ArrayType_ = typename arrow::TypeTraits<DataType>::ArrayType;
+  std::unique_ptr<BuilderType_> builder_;
+  std::vector<std::shared_ptr<ArrayType_>> cached_arr_;
+  arrow::compute::ExecContext* ctx_;
+  AppenderType type_;
+  bool has_null_ = false;
+};
+
+template <typename DataType>
+class UnsafeArrayAppender<DataType, enable_if_decimal<DataType>> : public AppenderBase {
+ public:
+  UnsafeArrayAppender(arrow::compute::ExecContext* ctx,
+                      std::shared_ptr<arrow::DataType> data_type,
+                      AppenderType type = left)
+      : ctx_(ctx), type_(type) {
+    std::unique_ptr<arrow::ArrayBuilder> array_builder;
+    arrow::MakeBuilder(ctx_->memory_pool(), data_type, &array_builder);
+    builder_.reset(arrow::internal::checked_cast<BuilderType_*>(array_builder.release()));
+  }
+  ~UnsafeArrayAppender() {}
+
+  AppenderType GetType() override { return type_; }
+  arrow::Status AddArray(const std::shared_ptr<arrow::Array>& arr) override {
+    auto typed_arr_ = std::dynamic_pointer_cast<ArrayType_>(arr);
+    cached_arr_.emplace_back(typed_arr_);
+    if (typed_arr_->null_count() > 0) has_null_ = true;
+    return arrow::Status::OK();
+  }
+
+  arrow::Status PopArray() override {
+    cached_arr_.pop_back();
+    has_null_ = false;
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id) override {
+    if (has_null_ && cached_arr_[array_id]->IsNull(item_id)) {
+      builder_->UnsafeAppendNull();
+    } else {
+      builder_->UnsafeAppend(cached_arr_[array_id]->GetView(item_id));
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const uint16_t& array_id, const uint16_t& item_id,
+                       int repeated) override {
+    if (repeated == 0) return arrow::Status::OK();
+    if (has_null_ && cached_arr_[array_id]->IsNull(item_id)) {
+      RETURN_NOT_OK(builder_->AppendNulls(repeated));
+    } else {
+      auto val = cached_arr_[array_id]->GetView(item_id);
+      for (int i = 0; i < repeated; i++) {
+        RETURN_NOT_OK(builder_->Append(val));
+      }
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Append(const std::vector<ArrayItemIndex>& index_list) {
+    for (auto tmp : index_list) {
+      if (has_null_ && cached_arr_[tmp.array_id]->IsNull(tmp.id)) {
+        RETURN_NOT_OK(builder_->AppendNull());
+      } else {
+        RETURN_NOT_OK(builder_->Append(cached_arr_[tmp.array_id]->GetView(tmp.id)));
+      }
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status AppendNull() override { return builder_->AppendNull(); }
+
+  arrow::Status Finish(std::shared_ptr<arrow::Array>* out_) override {
+    auto status = builder_->Finish(out_);
+    return status;
+  }
+
+  arrow::Status Reserve(uint64_t len) override {
+    builder_->Reserve(len);
+    return arrow::Status::OK();
+  }
+
+  arrow::Status Reset() override {
+    builder_->Reset();
+    return arrow::Status::OK();
+  }
+
+ private:
+  using BuilderType_ = typename arrow::TypeTraits<DataType>::BuilderType;
+  using ArrayType_ = typename arrow::TypeTraits<DataType>::ArrayType;
+  std::unique_ptr<BuilderType_> builder_;
+  std::vector<std::shared_ptr<ArrayType_>> cached_arr_;
+  arrow::compute::ExecContext* ctx_;
+  AppenderType type_;
+  bool has_null_ = false;
+};
+
+#define PROCESS_SUPPORTED_TYPES(PROCESS) \
+  PROCESS(arrow::BooleanType)            \
+  PROCESS(arrow::UInt8Type)              \
+  PROCESS(arrow::Int8Type)               \
+  PROCESS(arrow::UInt16Type)             \
+  PROCESS(arrow::Int16Type)              \
+  PROCESS(arrow::UInt32Type)             \
+  PROCESS(arrow::Int32Type)              \
+  PROCESS(arrow::UInt64Type)             \
+  PROCESS(arrow::Int64Type)              \
+  PROCESS(arrow::FloatType)              \
+  PROCESS(arrow::DoubleType)             \
+  PROCESS(arrow::Date32Type)             \
+  PROCESS(arrow::Date64Type)             \
+  PROCESS(arrow::StringType)
+static arrow::Status MakeUnsafeAppender(arrow::compute::ExecContext* ctx,
+                                        std::shared_ptr<arrow::DataType> type,
+                                        AppenderBase::AppenderType appender_type,
+                                        std::shared_ptr<AppenderBase>* out) {
+  switch (type->id()) {
+#define PROCESS(InType)                                                               \
+  case InType::type_id: {                                                             \
+    auto app_ptr = std::make_shared<UnsafeArrayAppender<InType>>(ctx, appender_type); \
+    *out = std::dynamic_pointer_cast<AppenderBase>(app_ptr);                          \
+  } break;
+    PROCESS_SUPPORTED_TYPES(PROCESS)
+#undef PROCESS
+    case arrow::Decimal128Type::type_id: {
+      auto app_ptr = std::make_shared<UnsafeArrayAppender<arrow::Decimal128Type>>(
+          ctx, type, appender_type);
+      *out = std::dynamic_pointer_cast<AppenderBase>(app_ptr);
+    } break;
+    default: {
+      return arrow::Status::NotImplemented("MakeAppender type not supported, type is ",
+                                           type->ToString());
+    } break;
+  }
+  return arrow::Status::OK();
+}
+#undef PROCESS_SUPPORTED_TYPES
+
 }  // namespace extra
 }  // namespace arrowcompute
 }  // namespace codegen
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc
index 221b2c48b..ba4b430cc 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/basic_physical_kernels.cc
@@ -47,8 +47,7 @@ using ArrayList = std::vector<std::shared_ptr<arrow::Array>>;
 ///////////////  Project  ////////////////
 class ProjectKernel::Impl {
  public:
-  Impl(arrow::compute::ExecContext* ctx,
-       const gandiva::NodeVector& input_field_node_list,
+  Impl(arrow::compute::ExecContext* ctx, const gandiva::NodeVector& input_field_node_list,
        const gandiva::NodeVector& project_list)
       : ctx_(ctx), project_list_(project_list) {
     for (auto node : input_field_node_list) {
@@ -147,8 +146,7 @@ arrow::Status ProjectKernel::DoCodeGen(
 ///////////////  Filter  ////////////////
 class FilterKernel::Impl {
  public:
-  Impl(arrow::compute::ExecContext* ctx,
-       const gandiva::NodeVector& input_field_node_list,
+  Impl(arrow::compute::ExecContext* ctx, const gandiva::NodeVector& input_field_node_list,
        const gandiva::NodePtr& condition)
       : ctx_(ctx), condition_(condition) {
     for (auto node : input_field_node_list) {
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/cmp_function.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/cmp_function.h
index 628305bf6..7a07511db 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/cmp_function.h
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/cmp_function.h
@@ -17,8 +17,9 @@
 
 #pragma once
 
-#include <arrow/type.h>
 #include <arrow/array.h>
+#include <arrow/type.h>
+
 #include "precompile/array.h"
 
 namespace sparkcolumnarplugin {
@@ -45,8 +46,8 @@ class TypedComparator {
     }
     if (null_total == 0) {
       if (asc) {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
           CType left = typed_arrays[left_array_id]->GetView(left_id);
           CType right = typed_arrays[right_array_id]->GetView(right_id);
           if (left != right) {
@@ -54,8 +55,8 @@ class TypedComparator {
           }
         };
       } else {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
           CType left = typed_arrays[left_array_id]->GetView(left_id);
           CType right = typed_arrays[right_array_id]->GetView(right_id);
           if (left != right) {
@@ -65,12 +66,12 @@ class TypedComparator {
       }
     } else if (asc) {
       if (nulls_first) {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
-          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-              typed_arrays[left_array_id]->IsNull(left_id);
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
+          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                              typed_arrays[left_array_id]->IsNull(left_id);
           bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-              typed_arrays[right_array_id]->IsNull(right_id);
+                               typed_arrays[right_array_id]->IsNull(right_id);
           if (!is_left_null || !is_right_null) {
             if (is_left_null) {
               cmp_res = 1;
@@ -86,12 +87,12 @@ class TypedComparator {
           }
         };
       } else {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
-          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-              typed_arrays[left_array_id]->IsNull(left_id);
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
+          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                              typed_arrays[left_array_id]->IsNull(left_id);
           bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-              typed_arrays[right_array_id]->IsNull(right_id);
+                               typed_arrays[right_array_id]->IsNull(right_id);
           if (!is_left_null || !is_right_null) {
             if (is_left_null) {
               cmp_res = 0;
@@ -108,12 +109,12 @@ class TypedComparator {
         };
       }
     } else if (nulls_first) {
-      return [=](int left_array_id, int right_array_id, 
-                 int64_t left_id, int64_t right_id, int& cmp_res) {
-        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-            typed_arrays[left_array_id]->IsNull(left_id);
+      return [=](int left_array_id, int right_array_id, int64_t left_id, int64_t right_id,
+                 int& cmp_res) {
+        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                            typed_arrays[left_array_id]->IsNull(left_id);
         bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-            typed_arrays[right_array_id]->IsNull(right_id);
+                             typed_arrays[right_array_id]->IsNull(right_id);
         if (!is_left_null || !is_right_null) {
           if (is_left_null) {
             cmp_res = 1;
@@ -129,12 +130,12 @@ class TypedComparator {
         }
       };
     } else {
-      return [=](int left_array_id, int right_array_id, 
-                 int64_t left_id, int64_t right_id, int& cmp_res) {
-        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-            typed_arrays[left_array_id]->IsNull(left_id);
+      return [=](int left_array_id, int right_array_id, int64_t left_id, int64_t right_id,
+                 int& cmp_res) {
+        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                            typed_arrays[left_array_id]->IsNull(left_id);
         bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-            typed_arrays[right_array_id]->IsNull(right_id);
+                             typed_arrays[right_array_id]->IsNull(right_id);
         if (!is_left_null || !is_right_null) {
           if (is_left_null) {
             cmp_res = 0;
@@ -176,8 +177,8 @@ class FloatingComparator {
       if (asc) {
         if (nan_check) {
           // null_total == 0, asc, nan_check
-          return [=](int left_array_id, int right_array_id, 
-                     int64_t left_id, int64_t right_id, int& cmp_res) {
+          return [=](int left_array_id, int right_array_id, int64_t left_id,
+                     int64_t right_id, int& cmp_res) {
             CType left = typed_arrays[left_array_id]->GetView(left_id);
             CType right = typed_arrays[right_array_id]->GetView(right_id);
             bool is_left_nan = std::isnan(left);
@@ -196,20 +197,20 @@ class FloatingComparator {
           };
         } else {
           // null_total == 0, asc, !nan_check
-          return [=](int left_array_id, int right_array_id, 
-                     int64_t left_id, int64_t right_id, int& cmp_res) {
+          return [=](int left_array_id, int right_array_id, int64_t left_id,
+                     int64_t right_id, int& cmp_res) {
             CType left = typed_arrays[left_array_id]->GetView(left_id);
             CType right = typed_arrays[right_array_id]->GetView(right_id);
             if (left != right) {
               cmp_res = left < right;
             }
-          };          
+          };
         }
       } else {
         if (nan_check) {
           // null_total == 0, desc, nan_check
-          return [=](int left_array_id, int right_array_id, 
-                     int64_t left_id, int64_t right_id, int& cmp_res) {
+          return [=](int left_array_id, int right_array_id, int64_t left_id,
+                     int64_t right_id, int& cmp_res) {
             CType left = typed_arrays[left_array_id]->GetView(left_id);
             CType right = typed_arrays[right_array_id]->GetView(right_id);
             bool is_left_nan = std::isnan(left);
@@ -228,26 +229,26 @@ class FloatingComparator {
           };
         } else {
           // null_total == 0, desc, !nan_check
-          return [=](int left_array_id, int right_array_id, 
-                     int64_t left_id, int64_t right_id, int& cmp_res) {
+          return [=](int left_array_id, int right_array_id, int64_t left_id,
+                     int64_t right_id, int& cmp_res) {
             CType left = typed_arrays[left_array_id]->GetView(left_id);
             CType right = typed_arrays[right_array_id]->GetView(right_id);
             if (left != right) {
               cmp_res = left > right;
             }
-          };          
+          };
         }
       }
     } else if (asc) {
       if (nulls_first) {
         if (nan_check) {
           // nulls_first, asc, nan_check
-          return [=](int left_array_id, int right_array_id, 
-                    int64_t left_id, int64_t right_id, int& cmp_res) {
-            bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-                typed_arrays[left_array_id]->IsNull(left_id);
+          return [=](int left_array_id, int right_array_id, int64_t left_id,
+                     int64_t right_id, int& cmp_res) {
+            bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                                typed_arrays[left_array_id]->IsNull(left_id);
             bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-                typed_arrays[right_array_id]->IsNull(right_id);
+                                 typed_arrays[right_array_id]->IsNull(right_id);
             if (!is_left_null || !is_right_null) {
               if (is_left_null) {
                 cmp_res = 1;
@@ -274,12 +275,12 @@ class FloatingComparator {
           };
         } else {
           // nulls_first, asc, !nan_check
-          return [=](int left_array_id, int right_array_id, 
-                    int64_t left_id, int64_t right_id, int& cmp_res) {
-            bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-                typed_arrays[left_array_id]->IsNull(left_id);
+          return [=](int left_array_id, int right_array_id, int64_t left_id,
+                     int64_t right_id, int& cmp_res) {
+            bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                                typed_arrays[left_array_id]->IsNull(left_id);
             bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-                typed_arrays[right_array_id]->IsNull(right_id);
+                                 typed_arrays[right_array_id]->IsNull(right_id);
             if (!is_left_null || !is_right_null) {
               if (is_left_null) {
                 cmp_res = 1;
@@ -298,12 +299,12 @@ class FloatingComparator {
       } else {
         if (nan_check) {
           // nulls_last, asc, nan_check
-          return [=](int left_array_id, int right_array_id, 
-                    int64_t left_id, int64_t right_id, int& cmp_res) {
-            bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-                typed_arrays[left_array_id]->IsNull(left_id);
+          return [=](int left_array_id, int right_array_id, int64_t left_id,
+                     int64_t right_id, int& cmp_res) {
+            bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                                typed_arrays[left_array_id]->IsNull(left_id);
             bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-                typed_arrays[right_array_id]->IsNull(right_id);
+                                 typed_arrays[right_array_id]->IsNull(right_id);
             if (!is_left_null || !is_right_null) {
               if (is_left_null) {
                 cmp_res = 0;
@@ -330,12 +331,12 @@ class FloatingComparator {
           };
         } else {
           // nulls_last, asc, !nan_check
-          return [=](int left_array_id, int right_array_id, 
-                    int64_t left_id, int64_t right_id, int& cmp_res) {
-            bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-                typed_arrays[left_array_id]->IsNull(left_id);
+          return [=](int left_array_id, int right_array_id, int64_t left_id,
+                     int64_t right_id, int& cmp_res) {
+            bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                                typed_arrays[left_array_id]->IsNull(left_id);
             bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-                typed_arrays[right_array_id]->IsNull(right_id);
+                                 typed_arrays[right_array_id]->IsNull(right_id);
             if (!is_left_null || !is_right_null) {
               if (is_left_null) {
                 cmp_res = 0;
@@ -355,12 +356,12 @@ class FloatingComparator {
     } else if (nulls_first) {
       if (nan_check) {
         // nulls_first, desc, nan_check
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
-          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-              typed_arrays[left_array_id]->IsNull(left_id);
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
+          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                              typed_arrays[left_array_id]->IsNull(left_id);
           bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-              typed_arrays[right_array_id]->IsNull(right_id);
+                               typed_arrays[right_array_id]->IsNull(right_id);
           if (!is_left_null || !is_right_null) {
             if (is_left_null) {
               cmp_res = 1;
@@ -387,12 +388,12 @@ class FloatingComparator {
         };
       } else {
         // nulls_first, desc, !nan_check
-        return [=](int left_array_id, int right_array_id, 
-                  int64_t left_id, int64_t right_id, int& cmp_res) {
-          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-              typed_arrays[left_array_id]->IsNull(left_id);
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
+          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                              typed_arrays[left_array_id]->IsNull(left_id);
           bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-              typed_arrays[right_array_id]->IsNull(right_id);
+                               typed_arrays[right_array_id]->IsNull(right_id);
           if (!is_left_null || !is_right_null) {
             if (is_left_null) {
               cmp_res = 1;
@@ -407,16 +408,16 @@ class FloatingComparator {
             }
           }
         };
-      }      
+      }
     } else {
       if (nan_check) {
         // nulls_last, desc, nan_check
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
-          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-              typed_arrays[left_array_id]->IsNull(left_id);
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
+          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                              typed_arrays[left_array_id]->IsNull(left_id);
           bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-              typed_arrays[right_array_id]->IsNull(right_id);
+                               typed_arrays[right_array_id]->IsNull(right_id);
           if (!is_left_null || !is_right_null) {
             if (is_left_null) {
               cmp_res = 0;
@@ -443,12 +444,12 @@ class FloatingComparator {
         };
       } else {
         // nulls_last, desc, !nan_check
-        return [=](int left_array_id, int right_array_id, 
-                  int64_t left_id, int64_t right_id, int& cmp_res) {
-          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-              typed_arrays[left_array_id]->IsNull(left_id);
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
+          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                              typed_arrays[left_array_id]->IsNull(left_id);
           bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-              typed_arrays[right_array_id]->IsNull(right_id);
+                               typed_arrays[right_array_id]->IsNull(right_id);
           if (!is_left_null || !is_right_null) {
             if (is_left_null) {
               cmp_res = 0;
@@ -488,8 +489,8 @@ class StringComparator {
     }
     if (null_total == 0) {
       if (asc) {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
           std::string left = typed_arrays[left_array_id]->GetString(left_id);
           std::string right = typed_arrays[right_array_id]->GetString(right_id);
           if (left != right) {
@@ -497,8 +498,8 @@ class StringComparator {
           }
         };
       } else {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
           std::string left = typed_arrays[left_array_id]->GetString(left_id);
           std::string right = typed_arrays[right_array_id]->GetString(right_id);
           if (left != right) {
@@ -508,12 +509,12 @@ class StringComparator {
       }
     } else if (asc) {
       if (nulls_first) {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
-          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-              typed_arrays[left_array_id]->IsNull(left_id);
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
+          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                              typed_arrays[left_array_id]->IsNull(left_id);
           bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-              typed_arrays[right_array_id]->IsNull(right_id);
+                               typed_arrays[right_array_id]->IsNull(right_id);
           if (!is_left_null || !is_right_null) {
             if (is_left_null) {
               cmp_res = 1;
@@ -529,12 +530,12 @@ class StringComparator {
           }
         };
       } else {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
-          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-              typed_arrays[left_array_id]->IsNull(left_id);
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
+          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                              typed_arrays[left_array_id]->IsNull(left_id);
           bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-              typed_arrays[right_array_id]->IsNull(right_id);
+                               typed_arrays[right_array_id]->IsNull(right_id);
           if (!is_left_null || !is_right_null) {
             if (is_left_null) {
               cmp_res = 0;
@@ -551,12 +552,12 @@ class StringComparator {
         };
       }
     } else if (nulls_first) {
-      return [=](int left_array_id, int right_array_id, 
-                 int64_t left_id, int64_t right_id, int& cmp_res) {
-        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-            typed_arrays[left_array_id]->IsNull(left_id);
+      return [=](int left_array_id, int right_array_id, int64_t left_id, int64_t right_id,
+                 int& cmp_res) {
+        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                            typed_arrays[left_array_id]->IsNull(left_id);
         bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-            typed_arrays[right_array_id]->IsNull(right_id);
+                             typed_arrays[right_array_id]->IsNull(right_id);
         if (!is_left_null || !is_right_null) {
           if (is_left_null) {
             cmp_res = 1;
@@ -572,12 +573,12 @@ class StringComparator {
         }
       };
     } else {
-      return [=](int left_array_id, int right_array_id, 
-                 int64_t left_id, int64_t right_id, int& cmp_res) {
-        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-            typed_arrays[left_array_id]->IsNull(left_id);
+      return [=](int left_array_id, int right_array_id, int64_t left_id, int64_t right_id,
+                 int& cmp_res) {
+        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                            typed_arrays[left_array_id]->IsNull(left_id);
         bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-            typed_arrays[right_array_id]->IsNull(right_id);
+                             typed_arrays[right_array_id]->IsNull(right_id);
         if (!is_left_null || !is_right_null) {
           if (is_left_null) {
             cmp_res = 0;
@@ -613,8 +614,8 @@ class DecimalComparator {
     }
     if (null_total == 0) {
       if (asc) {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
           arrow::Decimal128 left = typed_arrays[left_array_id]->GetView(left_id);
           arrow::Decimal128 right = typed_arrays[right_array_id]->GetView(right_id);
           if (left != right) {
@@ -622,8 +623,8 @@ class DecimalComparator {
           }
         };
       } else {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
           arrow::Decimal128 left = typed_arrays[left_array_id]->GetView(left_id);
           arrow::Decimal128 right = typed_arrays[right_array_id]->GetView(right_id);
           if (left != right) {
@@ -633,12 +634,12 @@ class DecimalComparator {
       }
     } else if (asc) {
       if (nulls_first) {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
-          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-              typed_arrays[left_array_id]->IsNull(left_id);
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
+          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                              typed_arrays[left_array_id]->IsNull(left_id);
           bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-              typed_arrays[right_array_id]->IsNull(right_id);
+                               typed_arrays[right_array_id]->IsNull(right_id);
           if (!is_left_null || !is_right_null) {
             if (is_left_null) {
               cmp_res = 1;
@@ -654,12 +655,12 @@ class DecimalComparator {
           }
         };
       } else {
-        return [=](int left_array_id, int right_array_id, 
-                   int64_t left_id, int64_t right_id, int& cmp_res) {
-          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-              typed_arrays[left_array_id]->IsNull(left_id);
+        return [=](int left_array_id, int right_array_id, int64_t left_id,
+                   int64_t right_id, int& cmp_res) {
+          bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                              typed_arrays[left_array_id]->IsNull(left_id);
           bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-              typed_arrays[right_array_id]->IsNull(right_id);
+                               typed_arrays[right_array_id]->IsNull(right_id);
           if (!is_left_null || !is_right_null) {
             if (is_left_null) {
               cmp_res = 0;
@@ -676,12 +677,12 @@ class DecimalComparator {
         };
       }
     } else if (nulls_first) {
-      return [=](int left_array_id, int right_array_id, 
-                 int64_t left_id, int64_t right_id, int& cmp_res) {
-        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-            typed_arrays[left_array_id]->IsNull(left_id);
+      return [=](int left_array_id, int right_array_id, int64_t left_id, int64_t right_id,
+                 int& cmp_res) {
+        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                            typed_arrays[left_array_id]->IsNull(left_id);
         bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-            typed_arrays[right_array_id]->IsNull(right_id);
+                             typed_arrays[right_array_id]->IsNull(right_id);
         if (!is_left_null || !is_right_null) {
           if (is_left_null) {
             cmp_res = 1;
@@ -697,12 +698,12 @@ class DecimalComparator {
         }
       };
     } else {
-      return [=](int left_array_id, int right_array_id, 
-                 int64_t left_id, int64_t right_id, int& cmp_res) {
-        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 && 
-            typed_arrays[left_array_id]->IsNull(left_id);
+      return [=](int left_array_id, int right_array_id, int64_t left_id, int64_t right_id,
+                 int& cmp_res) {
+        bool is_left_null = typed_arrays[left_array_id]->null_count() > 0 &&
+                            typed_arrays[left_array_id]->IsNull(left_id);
         bool is_right_null = typed_arrays[right_array_id]->null_count() > 0 &&
-            typed_arrays[right_array_id]->IsNull(right_id);
+                             typed_arrays[right_array_id]->IsNull(right_id);
         if (!is_left_null || !is_right_null) {
           if (is_left_null) {
             cmp_res = 0;
@@ -736,10 +737,8 @@ class DecimalComparator {
 static arrow::Status MakeCmpFunction(
     const std::vector<arrow::ArrayVector>& array_vectors,
     const std::vector<std::shared_ptr<arrow::Field>>& key_field_list,
-    const std::vector<int>& key_index_list,
-    const std::vector<bool>& sort_directions, 
-    const std::vector<bool>& nulls_order,
-    const bool& nan_check,
+    const std::vector<int>& key_index_list, const std::vector<bool>& sort_directions,
+    const std::vector<bool>& nulls_order, const bool& nan_check,
     std::vector<std::function<void(int, int, int64_t, int64_t, int&)>>& cmp_functions) {
   for (int i = 0; i < key_field_list.size(); i++) {
     auto type = key_field_list[i]->type();
@@ -749,35 +748,33 @@ static arrow::Status MakeCmpFunction(
     bool nulls_first = nulls_order[i];
     if (type->id() == arrow::Type::STRING) {
       auto comparator_ptr = std::make_shared<StringComparator>();
-      cmp_functions.push_back(
-          comparator_ptr->GetCompareFunc(col, asc, nulls_first));
+      cmp_functions.push_back(comparator_ptr->GetCompareFunc(col, asc, nulls_first));
     } else if (type->id() == arrow::Type::DOUBLE) {
-      auto comparator_ptr = 
+      auto comparator_ptr =
           std::make_shared<FloatingComparator<arrow::DoubleType, double>>();
       cmp_functions.push_back(
           comparator_ptr->GetCompareFunc(col, asc, nulls_first, nan_check));
     } else if (type->id() == arrow::Type::FLOAT) {
-      auto comparator_ptr = 
+      auto comparator_ptr =
           std::make_shared<FloatingComparator<arrow::FloatType, float>>();
       cmp_functions.push_back(
           comparator_ptr->GetCompareFunc(col, asc, nulls_first, nan_check));
     } else if (type->id() == arrow::Type::DECIMAL128) {
       auto comparator_ptr = std::make_shared<DecimalComparator>();
-      cmp_functions.push_back(
-          comparator_ptr->GetCompareFunc(col, asc, nulls_first));
+      cmp_functions.push_back(comparator_ptr->GetCompareFunc(col, asc, nulls_first));
     } else {
       switch (type->id()) {
-  #define PROCESS(InType)                                                           \
-      case InType::type_id: {                                                       \
-        using CType = typename arrow::TypeTraits<InType>::CType;                    \
-        auto comparator_ptr = std::make_shared<TypedComparator<InType, CType>>();   \
-        cmp_functions.push_back(comparator_ptr->GetCompareFunc(col, asc, nulls_first));\
-      } break;
+#define PROCESS(InType)                                                             \
+  case InType::type_id: {                                                           \
+    using CType = typename arrow::TypeTraits<InType>::CType;                        \
+    auto comparator_ptr = std::make_shared<TypedComparator<InType, CType>>();       \
+    cmp_functions.push_back(comparator_ptr->GetCompareFunc(col, asc, nulls_first)); \
+  } break;
         PROCESS_SUPPORTED_TYPES(PROCESS)
-  #undef PROCESS
+#undef PROCESS
         default: {
-          std::cout << "MakeCmpFunction type not supported, type is " 
-                    << type << std::endl;
+          std::cout << "MakeCmpFunction type not supported, type is " << type
+                    << std::endl;
         } break;
       }
     }
@@ -786,7 +783,6 @@ static arrow::Status MakeCmpFunction(
 }
 #undef PROCESS_SUPPORTED_TYPES
 
-
 }  // namespace extra
 }  // namespace arrowcompute
 }  // namespace codegen
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/code_generator_base.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/code_generator_base.h
index 1f8da6162..d66c5f2de 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/code_generator_base.h
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/code_generator_base.h
@@ -46,8 +46,10 @@ class CodeGenBase {
     return arrow::Status::NotImplemented("CodeGenBase Finish is an abstract interface.");
   }
 
-  virtual arrow::Status FinishInternal(std::shared_ptr<precompile::FixedSizeBinaryArray>* out) {
-    return arrow::Status::NotImplemented("CodeGenBase FinishInternal is an abstract interface.");
+  virtual arrow::Status FinishInternal(
+      std::shared_ptr<precompile::FixedSizeBinaryArray>* out) {
+    return arrow::Status::NotImplemented(
+        "CodeGenBase FinishInternal is an abstract interface.");
   }
 
   virtual arrow::Status MakeResultIterator(
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_node_visitor.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_node_visitor.cc
index be38655c9..309f2fd5a 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_node_visitor.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_node_visitor.cc
@@ -81,7 +81,8 @@ arrow::Status CodeGenNodeVisitor::ProduceGandivaFunction() {
     } break;
     default:
       return arrow::Status::NotImplemented(
-          "Unable to support function whose chidren both from left and right Or "
+          "Unable to support function whose chidren both from left and right "
+          "Or "
           "Unknown.");
   }
   prepare_str_ = prepare_ss.str();
@@ -121,7 +122,8 @@ arrow::Status CodeGenNodeVisitor::Visit(const gandiva::FunctionNode& node) {
                                                     "isnotnull"};
   auto func_name = node.descriptor()->name();
   auto input_list = input_list_;
-  if (func_name.compare(0, 7, "action_") != 0 && func_name.find("cast") == std::string::npos &&
+  if (func_name.compare(0, 7, "action_") != 0 &&
+      func_name.find("cast") == std::string::npos &&
       std::find(non_gandiva_func_list.begin(), non_gandiva_func_list.end(), func_name) ==
           non_gandiva_func_list.end()) {
     input_list = nullptr;
@@ -135,8 +137,8 @@ arrow::Status CodeGenNodeVisitor::Visit(const gandiva::FunctionNode& node) {
       RETURN_NOT_OK(
           MakeCodeGenNodeVisitor(child, field_list_v_[0], action_impl_, &child_visitor));
     } else {
-      // When set input_list as nullptr, MakeCodeGenNodeVisitor only check its children's
-      // field_type won't add codes.
+      // When set input_list as nullptr, MakeCodeGenNodeVisitor only check its
+      // children's field_type won't add codes.
       RETURN_NOT_OK(MakeCodeGenNodeVisitor(child, field_list_v_, func_count_, input_list,
                                            left_indices_, right_indices_, project_list_,
                                            &child_visitor));
@@ -339,7 +341,7 @@ arrow::Status CodeGenNodeVisitor::Visit(const gandiva::FunctionNode& node) {
       }
       prepare_str_ += prepare_ss.str();
       check_str_ = validity;
-    } else if (func_name.compare("multiply")  == 0) {
+    } else if (func_name.compare("multiply") == 0) {
       codes_str_ = "multiply_" + std::to_string(cur_func_id);
       auto validity = "multiply_validity_" + std::to_string(cur_func_id);
       std::stringstream prepare_ss;
@@ -358,48 +360,49 @@ arrow::Status CodeGenNodeVisitor::Visit(const gandiva::FunctionNode& node) {
       prepare_str_ += prepare_ss.str();
       check_str_ = validity;
     } else if (func_name.compare("divide") == 0) {
-        codes_str_ = "divide_" + std::to_string(cur_func_id);
-        auto validity = codes_str_ + "_validity";
-        std::stringstream fix_ss;
-        if (node.return_type()->id() != arrow::Type::DECIMAL) {
-          fix_ss << child_visitor_list[0]->GetResult() << " / "
-                << child_visitor_list[1]->GetResult();
-        } else {
-          auto leftNode = node.children().at(0);
-          auto rightNode = node.children().at(1);
-          auto leftType =
-              std::dynamic_pointer_cast<arrow::Decimal128Type>(leftNode->return_type());
-          auto rightType =
-              std::dynamic_pointer_cast<arrow::Decimal128Type>(rightNode->return_type());
-          auto resType = std::dynamic_pointer_cast<arrow::Decimal128Type>(node.return_type());
-          fix_ss << "divide(" << child_visitor_list[0]->GetResult() << ", "
-                << leftType->precision() << ", " << leftType->scale() << ", "
-                << child_visitor_list[1]->GetResult() << ", " << rightType->precision()
-                << ", " << rightType->scale() << ", " << resType->precision() << ", "
-                << resType->scale() << ", &overflow)";
-        }
-        std::stringstream prepare_ss;
-        prepare_ss << GetCTypeString(node.return_type()) << " " << codes_str_ << ";"
-                  << std::endl;
-        prepare_ss << "bool " << validity << " = ("
-                  << CombineValidity({child_visitor_list[0]->GetPreCheck(),
-                                      child_visitor_list[1]->GetPreCheck()})
-                  << ");" << std::endl;
-        prepare_ss << "if (" << validity << ") {" << std::endl;
-        if (node.return_type()->id() == arrow::Type::DECIMAL) {
-          prepare_ss << "bool overflow = false;" << std::endl;
-        }
-        prepare_ss << codes_str_ << " = " << fix_ss.str() << ";" << std::endl;
-        if (node.return_type()->id() == arrow::Type::DECIMAL) {
-          prepare_ss << "if (overflow) {\n" << validity << " = false;}" << std::endl;
-        }
-        prepare_ss << "}" << std::endl;
+      codes_str_ = "divide_" + std::to_string(cur_func_id);
+      auto validity = codes_str_ + "_validity";
+      std::stringstream fix_ss;
+      if (node.return_type()->id() != arrow::Type::DECIMAL) {
+        fix_ss << child_visitor_list[0]->GetResult() << " / "
+               << child_visitor_list[1]->GetResult();
+      } else {
+        auto leftNode = node.children().at(0);
+        auto rightNode = node.children().at(1);
+        auto leftType =
+            std::dynamic_pointer_cast<arrow::Decimal128Type>(leftNode->return_type());
+        auto rightType =
+            std::dynamic_pointer_cast<arrow::Decimal128Type>(rightNode->return_type());
+        auto resType =
+            std::dynamic_pointer_cast<arrow::Decimal128Type>(node.return_type());
+        fix_ss << "divide(" << child_visitor_list[0]->GetResult() << ", "
+               << leftType->precision() << ", " << leftType->scale() << ", "
+               << child_visitor_list[1]->GetResult() << ", " << rightType->precision()
+               << ", " << rightType->scale() << ", " << resType->precision() << ", "
+               << resType->scale() << ", &overflow)";
+      }
+      std::stringstream prepare_ss;
+      prepare_ss << GetCTypeString(node.return_type()) << " " << codes_str_ << ";"
+                 << std::endl;
+      prepare_ss << "bool " << validity << " = ("
+                 << CombineValidity({child_visitor_list[0]->GetPreCheck(),
+                                     child_visitor_list[1]->GetPreCheck()})
+                 << ");" << std::endl;
+      prepare_ss << "if (" << validity << ") {" << std::endl;
+      if (node.return_type()->id() == arrow::Type::DECIMAL) {
+        prepare_ss << "bool overflow = false;" << std::endl;
+      }
+      prepare_ss << codes_str_ << " = " << fix_ss.str() << ";" << std::endl;
+      if (node.return_type()->id() == arrow::Type::DECIMAL) {
+        prepare_ss << "if (overflow) {\n" << validity << " = false;}" << std::endl;
+      }
+      prepare_ss << "}" << std::endl;
 
-        for (int i = 0; i < 2; i++) {
-          prepare_str_ += child_visitor_list[i]->GetPrepare();
-        }
-        prepare_str_ += prepare_ss.str();
-        check_str_ = validity;
+      for (int i = 0; i < 2; i++) {
+        prepare_str_ += child_visitor_list[i]->GetPrepare();
+      }
+      prepare_str_ += prepare_ss.str();
+      check_str_ = validity;
     } else {
       RETURN_NOT_OK(ProduceGandivaFunction());
     }
@@ -559,7 +562,7 @@ arrow::Status CodeGenNodeVisitor::Visit(const gandiva::LiteralNode& node) {
   } else if (node.return_type()->id() == arrow::Type::DECIMAL) {
     auto scalar = arrow::util::get<gandiva::DecimalScalar128>(node.holder());
     auto decimal = arrow::Decimal128(scalar.value());
-    prepare_ss << "auto literal_" << cur_func_id << " = " 
+    prepare_ss << "auto literal_" << cur_func_id << " = "
                << "arrow::Decimal128(\"" << decimal.ToString(scalar.scale()) << "\");"
                << std::endl;
     decimal_scale_ = std::to_string(scalar.scale());
@@ -750,19 +753,19 @@ arrow::Status CodeGenNodeVisitor::InsertToIndices(int index, int arg_id,
   return arrow::Status::OK();
 }
 
-std::string CodeGenNodeVisitor::GetNaNCheckStr(std::string left, std::string right, 
+std::string CodeGenNodeVisitor::GetNaNCheckStr(std::string left, std::string right,
                                                std::string func) {
   std::stringstream ss;
   func = " " + func + " ";
-  ss << "((std::isnan(" << left << ") && std::isnan(" << right << ")) ? (1.0 / 0.0" << func << "1.0 / 0.0) : "
+  ss << "((std::isnan(" << left << ") && std::isnan(" << right << ")) ? (1.0 / 0.0"
+     << func << "1.0 / 0.0) : "
      << "(std::isnan(" << left << ")) ? (1.0 / 0.0" << func << right << ") : "
      << "(std::isnan(" << right << ")) ? (" << left << func << "1.0 / 0.0) : "
      << "(" << left << func << right << "))";
   return ss.str();
 }
 
-std::string CodeGenNodeVisitor::CombineValidity(
-    std::vector<std::string> validity_list) {
+std::string CodeGenNodeVisitor::CombineValidity(std::vector<std::string> validity_list) {
   bool first = true;
   std::stringstream out;
   for (auto validity : validity_list) {
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc
index 9efcfad53..7285edc3c 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc
@@ -49,8 +49,7 @@ using ArrayList = std::vector<std::shared_ptr<arrow::Array>>;
 ///////////////  ConditionedProbe  ////////////////
 class ConditionedMergeJoinKernel::Impl {
  public:
-  Impl(arrow::compute::ExecContext* ctx,
-       const gandiva::NodeVector& left_key_node_list,
+  Impl(arrow::compute::ExecContext* ctx, const gandiva::NodeVector& left_key_node_list,
        const gandiva::NodeVector& right_key_node_list,
        const gandiva::NodeVector& left_schema_node_list,
        const gandiva::NodeVector& right_schema_node_list,
@@ -771,7 +770,8 @@ class ConditionedMergeJoinKernel::Impl {
     auto fill_null_name = "is_outer_null_" + relation_id;
     bool use_relation_for_stream = input.empty();
 
-    // define output list here, which will also be defined in class variables definition
+    // define output list here, which will also be defined in class variables
+    // definition
 
     int right_index_shift = 0;
     std::vector<int> left_output_idx_list;
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc
index ecb2fcc4f..ef35c102b 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc
@@ -284,8 +284,9 @@ class ConditionedProbeKernel::Impl {
       prepare_ss << "auto key_" << hash_relation_id_ << "_validity = " << validity_name
                  << ";" << std::endl;
       /*for (auto header : hash_node_visitor->GetHeaders()) {
-        if (std::find(codegen_ctx->header_codes.begin(), codegen_ctx->header_codes.end(),
-                      header) == codegen_ctx->header_codes.end()) {
+        if (std::find(codegen_ctx->header_codes.begin(),
+      codegen_ctx->header_codes.end(), header) ==
+      codegen_ctx->header_codes.end()) {
           codegen_ctx->header_codes.push_back(header);
         }
       }*/
@@ -413,9 +414,9 @@ class ConditionedProbeKernel::Impl {
           std::dynamic_pointer_cast<ResultIterator<HashRelation>>(iter);
       RETURN_NOT_OK(typed_dependent->Next(&hash_relation_));
 
-      // chendi: previous result_schema_index_list design is little tricky, it put
-      // existentce col at the back of all col while exists_index_ may be at middle out
-      // real result. Add two index here.
+      // chendi: previous result_schema_index_list design is little tricky, it
+      // put existentce col at the back of all col while exists_index_ may be at
+      // middle out real result. Add two index here.
       auto result_schema_length =
           (exist_index_ == -1 || exist_index_ == right_field_list_.size())
               ? result_schema_index_list_.size()
@@ -482,13 +483,14 @@ class ConditionedProbeKernel::Impl {
           } break;
           default:
             return arrow::Status::NotImplemented(
-                "ConditionedProbeArraysTypedImpl only support join type: InnerJoin, "
+                "ConditionedProbeArraysTypedImpl only support join type: "
+                "InnerJoin, "
                 "RightJoin");
         }
       } else {
         // if hash_map_type == 0, we use TypedHashRelation
-        // when hash_map_type == 0, we won't check actual value ifEqual, this code block
-        // will be removed in near future
+        // when hash_map_type == 0, we won't check actual value ifEqual, this
+        // code block will be removed in near future
         switch (key_type_->id()) {
 #define PROCESS(InType)                                                                  \
   case InType::type_id: {                                                                \
@@ -520,14 +522,16 @@ class ConditionedProbeKernel::Impl {
       } break;                                                                           \
       default:                                                                           \
         return arrow::Status::NotImplemented(                                            \
-            "ConditionedProbeArraysTypedImpl only support join type: InnerJoin, "        \
+            "ConditionedProbeArraysTypedImpl only support join type: "                   \
+            "InnerJoin, "                                                                \
             "RightJoin");                                                                \
     }                                                                                    \
   } break;
           PROCESS_SUPPORTED_TYPES(PROCESS)
 #undef PROCESS
           default: {
-            std::cout << "ConditionedProbeArraysTypedImpl does not support key type as "
+            std::cout << "ConditionedProbeArraysTypedImpl does not support key "
+                         "type as "
                       << key_type_ << std::endl;
           } break;
         }
@@ -544,9 +548,9 @@ class ConditionedProbeKernel::Impl {
       std::shared_ptr<arrow::Array> key_array;
       arrow::ArrayVector projected_keys_outputs;
       /**
-       * if hash_map_type_ == 0, we only need to build a single-column hashArray for key
-       * if hash_map_type_ == 1, we need to both get a single-column hashArray and
-       *projected result of original keys for hashmap
+       * if hash_map_type_ == 0, we only need to build a single-column hashArray
+       *for key if hash_map_type_ == 1, we need to both get a single-column
+       *hashArray and projected result of original keys for hashmap
        **/
       arrow::ArrayVector outputs;
       auto length = in.size() > 0 ? in[0]->length() : 0;
@@ -683,7 +687,8 @@ class ConditionedProbeKernel::Impl {
             } break;
             default: {
               throw std::runtime_error(
-                  "UnsafeInnerProbeFunction Evaluate doesn't support single key type ");
+                  "UnsafeInnerProbeFunction Evaluate doesn't support single "
+                  "key type ");
             } break;
           }
 #undef PROCESS_SUPPORTED_TYPES
@@ -801,7 +806,8 @@ class ConditionedProbeKernel::Impl {
             } break;
             default: {
               throw std::runtime_error(
-                  "UnsafeOuterProbeFunction Evaluate doesn't support single key type ");
+                  "UnsafeOuterProbeFunction Evaluate doesn't support single "
+                  "key type ");
             } break;
           }
 #undef PROCESS_SUPPORTED_TYPES
@@ -927,7 +933,8 @@ class ConditionedProbeKernel::Impl {
             } break;
             default: {
               throw std::runtime_error(
-                  "UnsafeAntiProbeFunction Evaluate doesn't support single key type ");
+                  "UnsafeAntiProbeFunction Evaluate doesn't support single key "
+                  "type ");
             } break;
           }
 #undef PROCESS_SUPPORTED_TYPES
@@ -1044,7 +1051,8 @@ class ConditionedProbeKernel::Impl {
             } break;
             default: {
               throw std::runtime_error(
-                  "UnsafeSemiProbeFunction Evaluate doesn't support single key type ");
+                  "UnsafeSemiProbeFunction Evaluate doesn't support single key "
+                  "type ");
             } break;
           }
 #undef PROCESS_SUPPORTED_TYPES
@@ -1164,7 +1172,8 @@ class ConditionedProbeKernel::Impl {
             } break;
             default: {
               throw std::runtime_error(
-                  "UnsafeSemiProbeFunction Evaluate doesn't support single key type ");
+                  "UnsafeSemiProbeFunction Evaluate doesn't support single key "
+                  "type ");
             } break;
           }
 #undef PROCESS_SUPPORTED_TYPES
@@ -1811,7 +1820,8 @@ class ConditionedProbeKernel::Impl {
       } break;
       default:
         return arrow::Status::NotImplemented(
-            "ConditionedProbeArraysTypedImpl only support join type: InnerJoin, "
+            "ConditionedProbeArraysTypedImpl only support join type: "
+            "InnerJoin, "
             "RightJoin");
     }
     return arrow::Status::OK();
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc
index 1a332e02f..7b0483203 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/expression_codegen_visitor.cc
@@ -85,8 +85,8 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node)
     }
     codes_str_ = ss.str();
   } else if (func_name.compare("less_than_with_nan") == 0) {
-    real_codes_str_ = "less_than_with_nan(" + child_visitor_list[0]->GetResult()
-        + ", " + child_visitor_list[1]->GetResult() + ")";
+    real_codes_str_ = "less_than_with_nan(" + child_visitor_list[0]->GetResult() + ", " +
+                      child_visitor_list[1]->GetResult() + ")";
     real_validity_str_ = CombineValidity(
         {child_visitor_list[0]->GetPreCheck(), child_visitor_list[1]->GetPreCheck()});
     ss << real_validity_str_ << " && " << real_codes_str_;
@@ -106,8 +106,8 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node)
     }
     codes_str_ = ss.str();
   } else if (func_name.compare("greater_than_with_nan") == 0) {
-    real_codes_str_ = "greater_than_with_nan(" + child_visitor_list[0]->GetResult()
-        + ", " + child_visitor_list[1]->GetResult() + ")";
+    real_codes_str_ = "greater_than_with_nan(" + child_visitor_list[0]->GetResult() +
+                      ", " + child_visitor_list[1]->GetResult() + ")";
     real_validity_str_ = CombineValidity(
         {child_visitor_list[0]->GetPreCheck(), child_visitor_list[1]->GetPreCheck()});
     ss << real_validity_str_ << " && " << real_codes_str_;
@@ -127,9 +127,9 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node)
     }
     codes_str_ = ss.str();
   } else if (func_name.compare("less_than_or_equal_to_with_nan") == 0) {
-    real_codes_str_ = "less_than_or_equal_to_with_nan("
-        + child_visitor_list[0]->GetResult()
-        + ", " + child_visitor_list[1]->GetResult() + ")";
+    real_codes_str_ = "less_than_or_equal_to_with_nan(" +
+                      child_visitor_list[0]->GetResult() + ", " +
+                      child_visitor_list[1]->GetResult() + ")";
     real_validity_str_ = CombineValidity(
         {child_visitor_list[0]->GetPreCheck(), child_visitor_list[1]->GetPreCheck()});
     ss << real_validity_str_ << " && " << real_codes_str_;
@@ -149,9 +149,9 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node)
     }
     codes_str_ = ss.str();
   } else if (func_name.compare("greater_than_or_equal_to_with_nan") == 0) {
-    real_codes_str_ = "greater_than_or_equal_to_with_nan("
-        + child_visitor_list[0]->GetResult()
-        + ", " + child_visitor_list[1]->GetResult() + ")";
+    real_codes_str_ = "greater_than_or_equal_to_with_nan(" +
+                      child_visitor_list[0]->GetResult() + ", " +
+                      child_visitor_list[1]->GetResult() + ")";
     real_validity_str_ = CombineValidity(
         {child_visitor_list[0]->GetPreCheck(), child_visitor_list[1]->GetPreCheck()});
     ss << real_validity_str_ << " && " << real_codes_str_;
@@ -171,8 +171,8 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node)
     }
     codes_str_ = ss.str();
   } else if (func_name.compare("equal_with_nan") == 0) {
-    real_codes_str_ = "equal_with_nan(" + child_visitor_list[0]->GetResult()
-        + ", " + child_visitor_list[1]->GetResult() + ")";
+    real_codes_str_ = "equal_with_nan(" + child_visitor_list[0]->GetResult() + ", " +
+                      child_visitor_list[1]->GetResult() + ")";
     real_validity_str_ = CombineValidity(
         {child_visitor_list[0]->GetPreCheck(), child_visitor_list[1]->GetPreCheck()});
     ss << real_validity_str_ << " && " << real_codes_str_;
@@ -433,7 +433,7 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node)
         std::dynamic_pointer_cast<arrow::Decimal128Type>(childNode->return_type());
     fix_ss << ", " << childType->precision() << ", " << childType->scale() << ", "
            << decimal_type->precision() << ", " << decimal_type->scale() << ", &overflow";
-    
+
     std::stringstream prepare_ss;
     prepare_ss << GetCTypeString(node.return_type()) << " " << codes_str_ << ";"
                << std::endl;
@@ -502,6 +502,15 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node)
     codes_str_ = func_name + "_" + std::to_string(cur_func_id);
     auto validity = codes_str_ + "_validity";
     std::stringstream fix_ss;
+    if (node.return_type()->id() != arrow::Type::DECIMAL) {
+      fix_ss << "round2(" << child_visitor_list[0]->GetResult();
+    } else {
+      auto childNode = node.children().at(0);
+      auto childType =
+          std::dynamic_pointer_cast<arrow::Decimal128Type>(childNode->return_type());
+      fix_ss << "round(" << child_visitor_list[0]->GetResult() << ", "
+             << childType->precision() << ", " << childType->scale() << ", &overflow";
+    }
     if (child_visitor_list.size() > 1) {
       fix_ss << ", " << child_visitor_list[1]->GetResult();
     }
@@ -514,8 +523,13 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node)
     prepare_ss << "bool " << validity << " = " << child_visitor_list[0]->GetPreCheck()
                << ";" << std::endl;
     prepare_ss << "if (" << validity << ") {" << std::endl;
-    prepare_ss << codes_str_ << " = round2(" << child_visitor_list[0]->GetResult()
-               << fix_ss.str() << ");" << std::endl;
+    if (node.return_type()->id() == arrow::Type::DECIMAL) {
+      prepare_ss << "bool overflow = false;" << std::endl;
+    }
+    prepare_ss << codes_str_ << " = " << fix_ss.str() << ");" << std::endl;
+    if (node.return_type()->id() == arrow::Type::DECIMAL) {
+      prepare_ss << "if (overflow) {\n" << validity << " = false;}" << std::endl;
+    }
     prepare_ss << "}" << std::endl;
 
     prepare_str_ += prepare_ss.str();
@@ -524,14 +538,19 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FunctionNode& node)
   } else if (func_name.compare("abs") == 0) {
     codes_str_ = "abs_" + std::to_string(cur_func_id);
     auto validity = codes_str_ + "_validity";
+    std::stringstream fix_ss;
+    if (node.return_type()->id() != arrow::Type::DECIMAL) {
+      fix_ss << "abs(" << child_visitor_list[0]->GetResult() << ")";
+    } else {
+      fix_ss << child_visitor_list[0]->GetResult() << ".Abs()";
+    }
     std::stringstream prepare_ss;
     prepare_ss << GetCTypeString(node.return_type()) << " " << codes_str_ << ";"
                << std::endl;
     prepare_ss << "bool " << validity << " = " << child_visitor_list[0]->GetPreCheck()
                << ";" << std::endl;
     prepare_ss << "if (" << validity << ") {" << std::endl;
-    prepare_ss << codes_str_ << " = abs(" << child_visitor_list[0]->GetResult() << ");"
-               << std::endl;
+    prepare_ss << codes_str_ << " = " << fix_ss.str() << ";" << std::endl;
     prepare_ss << "}" << std::endl;
 
     for (int i = 0; i < 1; i++) {
@@ -830,7 +849,8 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FieldNode& node) {
     field_type_ = sort_relation;
   } else {
     if (is_smj_) {
-      ///// For inputs are build side as SortRelation, streamed side as input /////
+      ///// For inputs are build side as SortRelation, streamed side as input
+      ////////
       if (index == 0) {
         codes_str_ = "sort_relation_" + std::to_string(hash_relation_id_ + index) + "_" +
                      std::to_string(arg_id) + "_value";
@@ -862,7 +882,8 @@ arrow::Status ExpressionCodegenVisitor::Visit(const gandiva::FieldNode& node) {
         field_type_ = right;
       }
     } else {
-      ///// For Inputs are one side HashRelation and other side regular array /////
+      ///// For Inputs are one side HashRelation and other side regular array
+      ////////
       if (field_list_v_.size() == 1) {
         prepare_ss << (*input_list_)[arg_id].first.second;
         if (!is_local_) {
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc
index adf1103e4..8e19adfeb 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc
@@ -410,7 +410,8 @@ class HashAggregateKernel::Impl {
     }
 
     action_list_define_function_ss
-        << "arrow::Status PrepareActionList(std::vector<std::string> action_name_list, "
+        << "arrow::Status PrepareActionList(std::vector<std::string> "
+           "action_name_list, "
            "std::vector<std::shared_ptr<arrow::DataType>> type_list,"
            "std::vector<std::shared_ptr<arrow::DataType>> result_field_list,"
            "std::vector<std::shared_ptr<ActionBase>> *action_list) {"
@@ -513,7 +514,8 @@ class HashAggregateKernel::Impl {
       codegen_ctx->gandiva_projector = std::make_shared<GandivaProjector>(
           ctx_, arrow::schema(result_field_list_), GetGandivaKernel(result_expr_list_));
       codegen_ctx->header_codes.push_back(R"(#include "precompile/gandiva_projector.h")");
-      finish_ss << "RETURN_NOT_OK(gandiva_projector_list_[gp_idx++]->Evaluate(&do_hash_"
+      finish_ss << "RETURN_NOT_OK(gandiva_projector_list_[gp_idx++]->Evaluate(&"
+                   "do_hash_"
                    "aggr_finish_"
                 << level << "_out));" << std::endl;
     }
@@ -674,7 +676,8 @@ class HashAggregateKernel::Impl {
             cols.push_back(in[idx]);
           }
           if (cols.empty()) {
-            // There is a special case, when we need to do no groupby count literal
+            // There is a special case, when we need to do no groupby count
+            // literal
             RETURN_NOT_OK(action->EvaluateCountLiteral(in[0]->length()));
 
           } else {
@@ -851,8 +854,8 @@ class HashAggregateKernel::Impl {
               typed_key_in->null_count() == 0 ? true : !typed_key_in->IsNull(i);
         }
 
-        // for (int n = 0; n < aggr_key.size(); ++n) printf("%0X ", *(aggr_key.data() +
-        // n)); std::cout << std::endl;
+        // for (int n = 0; n < aggr_key.size(); ++n) printf("%0X ",
+        // *(aggr_key.data() + n)); std::cout << std::endl;
 
         // 3. get key from hash_table
         int memo_index = 0;
@@ -983,7 +986,8 @@ class HashAggregateKernel::Impl {
             cols.push_back(in[idx]);
           }
           if (cols.empty()) {
-            // There is a special case, when we need to do no groupby count literal
+            // There is a special case, when we need to do no groupby count
+            // literal
             RETURN_NOT_OK(action->EvaluateCountLiteral(in[0]->length()));
 
           } else {
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc
index 1f00877e1..00276e57e 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_relation_kernel.cc
@@ -75,8 +75,14 @@ class HashRelationKernel::Impl {
           std::dynamic_pointer_cast<gandiva::LiteralNode>(parameter_nodes[0])->holder());
       builder_type_ = std::stoi(builder_type_str);
     }
+    if (builder_type_ == 3) {
+      // This is for using unsafeHashMap while with skipDuplication strategy
+      semi_ = true;
+      builder_type_ = 1;
+    }
     if (builder_type_ == 0) {
-      // builder_type_ == 0 will be abandoned in near future, won't support decimal here.
+      // builder_type_ == 0 will be abandoned in near future, won't support
+      // decimal here.
       if (key_nodes.size() == 1) {
         auto key_node = key_nodes[0];
         std::shared_ptr<TypedNodeVisitor> node_visitor;
@@ -185,10 +191,16 @@ class HashRelationKernel::Impl {
       if (num_total_cached_ > 32) {
         init_key_capacity = pow(2, ceil(log2(num_total_cached_)) + 1);
       }
+      long tmp_capacity = init_key_capacity;
       if (key_size_ != -1) {
-        init_bytes_map_capacity = init_key_capacity * 12;
+        tmp_capacity *= 12;
+      } else {
+        tmp_capacity *= 128;
+      }
+      if (tmp_capacity > INT_MAX) {
+        init_bytes_map_capacity = INT_MAX;
       } else {
-        init_bytes_map_capacity = init_key_capacity * 128;
+        init_bytes_map_capacity = tmp_capacity;
       }
       RETURN_NOT_OK(
           hash_relation_->InitHashTable(init_key_capacity, init_bytes_map_capacity));
@@ -200,8 +212,8 @@ class HashRelationKernel::Impl {
       } else {
         auto project_outputs = keys_cached_[idx];
 
-/* For single field fixed_size key, we simply insert to HashMap without append to unsafe
- * Row */
+/* For single field fixed_size key, we simply insert to HashMap without append
+ * to unsafe Row */
 #define PROCESS_SUPPORTED_TYPES(PROCESS) \
   PROCESS(arrow::BooleanType)            \
   PROCESS(arrow::UInt8Type)              \
@@ -220,11 +232,11 @@ class HashRelationKernel::Impl {
   PROCESS(arrow::Decimal128Type)
         if (project_outputs.size() == 1) {
           switch (project_outputs[0]->type_id()) {
-#define PROCESS(InType)                                                       \
-  case TypeTraits<InType>::type_id: {                                         \
-    using ArrayType = precompile::TypeTraits<InType>::ArrayType;              \
-    auto typed_key_arr = std::make_shared<ArrayType>(project_outputs[0]);     \
-    RETURN_NOT_OK(hash_relation_->AppendKeyColumn(key_array, typed_key_arr)); \
+#define PROCESS(InType)                                                              \
+  case TypeTraits<InType>::type_id: {                                                \
+    using ArrayType = precompile::TypeTraits<InType>::ArrayType;                     \
+    auto typed_key_arr = std::make_shared<ArrayType>(project_outputs[0]);            \
+    RETURN_NOT_OK(hash_relation_->AppendKeyColumn(key_array, typed_key_arr, semi_)); \
   } break;
             PROCESS_SUPPORTED_TYPES(PROCESS)
 #undef PROCESS
@@ -245,7 +257,7 @@ class HashRelationKernel::Impl {
             RETURN_NOT_OK(MakeUnsafeArray(arr->type(), i++, arr, &payload));
             payloads.push_back(payload);
           }
-          RETURN_NOT_OK(hash_relation_->AppendKeyColumn(key_array, payloads));
+          RETURN_NOT_OK(hash_relation_->AppendKeyColumn(key_array, payloads, semi_));
         }
       }
     }
@@ -274,6 +286,7 @@ class HashRelationKernel::Impl {
   std::vector<std::shared_ptr<arrow::Array>> key_hash_cached_;
   uint64_t num_total_cached_ = 0;
   int builder_type_ = 0;
+  bool semi_ = false;
   int key_size_ = -1;  // If key_size_ != 0, key will be stored directly in key_map
 
   class HashRelationResultIterator : public ResultIterator<HashRelation> {
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc
index 71984f3e6..296b204bb 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.cc
@@ -76,8 +76,8 @@ class EncodeArrayTypedImpl : public EncodeArrayKernel::Impl {
   arrow::Status Evaluate(const std::shared_ptr<arrow::Array>& in,
                          std::shared_ptr<arrow::Array>* out) {
     // arrow::Datum input_datum(in);
-    // RETURN_NOT_OK(arrow::compute::Group<InType>(ctx_, input_datum, hash_table_, out));
-    // we should put items into hashmap
+    // RETURN_NOT_OK(arrow::compute::Group<InType>(ctx_, input_datum,
+    // hash_table_, out)); we should put items into hashmap
     builder_->Reset();
     auto typed_array = std::dynamic_pointer_cast<ArrayType>(in);
     auto insert_on_found = [this](int32_t i) { builder_->Append(i); };
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h
index 4ede6fd30..e2b6dd972 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/kernels_ext.h
@@ -127,8 +127,7 @@ class WindowAggregateFunctionKernel : public KernalBase {
       std::shared_ptr<arrow::DataType> result_type,
       std::vector<std::shared_ptr<arrow::Int32Array>> accumulated_group_ids,
       std::shared_ptr<ActionFactory> action);
-  static arrow::Status Make(arrow::compute::ExecContext* ctx,
-                            std::string function_name,
+  static arrow::Status Make(arrow::compute::ExecContext* ctx, std::string function_name,
                             std::vector<std::shared_ptr<arrow::DataType>> type_list,
                             std::shared_ptr<arrow::DataType> result_type,
                             std::shared_ptr<KernalBase>* out);
@@ -136,14 +135,17 @@ class WindowAggregateFunctionKernel : public KernalBase {
   arrow::Status Finish(ArrayList* out) override;
 
  private:
-  template<typename ValueType, typename BuilderType, typename ArrayType>
+  template <typename ValueType, typename BuilderType, typename ArrayType>
   arrow::Status Finish0(ArrayList* out, std::shared_ptr<arrow::DataType> data_type);
 
-  template<typename ValueType, typename BuilderType>
-  typename arrow::enable_if_decimal128<ValueType, arrow::Result<std::shared_ptr<BuilderType>>> createBuilder(std::shared_ptr<arrow::DataType> data_type);
+  template <typename ValueType, typename BuilderType>
+  typename arrow::enable_if_decimal128<ValueType,
+                                       arrow::Result<std::shared_ptr<BuilderType>>>
+  createBuilder(std::shared_ptr<arrow::DataType> data_type);
 
-  template<typename ValueType, typename BuilderType>
-  typename arrow::enable_if_number<ValueType, arrow::Result<std::shared_ptr<BuilderType>>> createBuilder(std::shared_ptr<arrow::DataType> data_type);
+  template <typename ValueType, typename BuilderType>
+  typename arrow::enable_if_number<ValueType, arrow::Result<std::shared_ptr<BuilderType>>>
+  createBuilder(std::shared_ptr<arrow::DataType> data_type);
 
   arrow::compute::ExecContext* ctx_;
   std::shared_ptr<ActionFactory> action_;
@@ -175,20 +177,16 @@ class SortArraysToIndicesKernel : public KernalBase {
                             gandiva::NodeVector sort_key_node,
                             std::vector<std::shared_ptr<arrow::Field>> key_field_list,
                             std::vector<bool> sort_directions,
-                            std::vector<bool> nulls_order, 
-                            bool NaN_check,
-                            bool do_codegen,
-                            int result_type,
+                            std::vector<bool> nulls_order, bool NaN_check,
+                            bool do_codegen, int result_type,
                             std::shared_ptr<KernalBase>* out);
   SortArraysToIndicesKernel(arrow::compute::ExecContext* ctx,
                             std::shared_ptr<arrow::Schema> result_schema,
                             gandiva::NodeVector sort_key_node,
                             std::vector<std::shared_ptr<arrow::Field>> key_field_list,
                             std::vector<bool> sort_directions,
-                            std::vector<bool> nulls_order, 
-                            bool NaN_check,
-                            bool do_codegen,
-                            int result_type);
+                            std::vector<bool> nulls_order, bool NaN_check,
+                            bool do_codegen, int result_type);
   arrow::Status Evaluate(const ArrayList& in) override;
   arrow::Status MakeResultIterator(
       std::shared_ptr<arrow::Schema> schema,
@@ -283,8 +281,7 @@ class WindowRankKernel : public KernalBase {
   WindowRankKernel(arrow::compute::ExecContext* ctx,
                    std::vector<std::shared_ptr<arrow::DataType>> type_list,
                    std::shared_ptr<WindowSortKernel::Impl> sorter, bool desc);
-  static arrow::Status Make(arrow::compute::ExecContext* ctx,
-                            std::string function_name,
+  static arrow::Status Make(arrow::compute::ExecContext* ctx, std::string function_name,
                             std::vector<std::shared_ptr<arrow::DataType>> type_list,
                             std::shared_ptr<KernalBase>* out, bool desc);
   arrow::Status Evaluate(const ArrayList& in) override;
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc
index 6a0e4e437..01e19397c 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc
@@ -236,13 +236,13 @@ class ConditionedJoinArraysKernel::Impl {
       std::stringstream ss;
       if (data_type_->id() == arrow::Type::DECIMAL) {
         ss << "builder_" << indice_ << "_ = std::make_shared<"
-                 << GetTypeString(data_type_, "Builder")
-                 << ">(arrow::" << GetArrowTypeDefString(data_type_)
-                 << ", ctx_->memory_pool());" << std::endl;
+           << GetTypeString(data_type_, "Builder")
+           << ">(arrow::" << GetArrowTypeDefString(data_type_)
+           << ", ctx_->memory_pool());" << std::endl;
       } else {
-      ss << "builder_" << indice_ << "_ = std::make_shared<"
-         << GetTypeString(data_type_, "Builder") << ">(ctx_->memory_pool());"
-         << std::endl;
+        ss << "builder_" << indice_ << "_ = std::make_shared<"
+           << GetTypeString(data_type_, "Builder") << ">(ctx_->memory_pool());"
+           << std::endl;
       }
       return ss.str();
     }
@@ -460,7 +460,6 @@ class ConditionedJoinArraysKernel::Impl {
       ss << "  RETURN_NOT_OK(builder_1_" << i << "_->Append(cached_1_" << i
          << "_->GetView(i)));" << std::endl;
       ss << "}" << std::endl;
-
     }
     std::string shuffle_str;
     if (cond_check) {
@@ -488,7 +487,7 @@ class ConditionedJoinArraysKernel::Impl {
       right_value += "item_content{";
       for (auto i = 0; i < right_key_index_list.size(); i++) {
         right_value += "typed_array_" + std::to_string(i) + "->GetView(i)";
-        if (i != right_key_index_list.size() -1) {
+        if (i != right_key_index_list.size() - 1) {
           right_value += ",";
         }
       }
@@ -521,7 +520,8 @@ class ConditionedJoinArraysKernel::Impl {
     std::stringstream left_valid_ss;
     std::stringstream right_valid_ss;
     for (auto i : left_shuffle_index_list) {
-      left_valid_ss << "if (cached_0_" << i << "_[tmp.array_id]->null_count()) {" << std::endl;
+      left_valid_ss << "if (cached_0_" << i << "_[tmp.array_id]->null_count()) {"
+                    << std::endl;
       left_valid_ss << "if (cached_0_" << i << "_[tmp.array_id]->IsNull(tmp.id)) {"
                     << std::endl;
       left_valid_ss << "  RETURN_NOT_OK(builder_0_" << i << "_->AppendNull());"
@@ -547,9 +547,8 @@ class ConditionedJoinArraysKernel::Impl {
       right_valid_ss << "}" << std::endl;
       right_valid_ss << "} else {" << std::endl;
       right_valid_ss << "  RETURN_NOT_OK(builder_1_" << i << "_->Append(cached_1_" << i
-         << "_->GetView(i)));" << std::endl;
+                     << "_->GetView(i)));" << std::endl;
       right_valid_ss << "}" << std::endl;
-
     }
     std::string shuffle_str;
     if (cond_check) {
@@ -573,7 +572,7 @@ class ConditionedJoinArraysKernel::Impl {
       right_value += "item_content{";
       for (auto i = 0; i < right_key_index_list.size(); i++) {
         right_value += "typed_array_" + std::to_string(i) + "->GetView(i)";
-        if (i != right_key_index_list.size() -1) {
+        if (i != right_key_index_list.size() - 1) {
           right_value += ",";
         }
       }
@@ -628,15 +627,16 @@ class ConditionedJoinArraysKernel::Impl {
   )";
   }
   std::string GetFullOuterJoin(bool cond_check,
-                           const std::vector<int>& left_shuffle_index_list,
-                           const std::vector<int>& right_shuffle_index_list,
-                           const std::vector<int>& right_key_index_list) {
+                               const std::vector<int>& left_shuffle_index_list,
+                               const std::vector<int>& right_shuffle_index_list,
+                               const std::vector<int>& right_key_index_list) {
     std::stringstream left_null_ss;
     std::stringstream right_null_ss;
     std::stringstream left_valid_ss;
     std::stringstream right_valid_ss;
     for (auto i : left_shuffle_index_list) {
-      left_valid_ss << "if (cached_0_" << i << "_[tmp.array_id]->null_count()) {" << std::endl;
+      left_valid_ss << "if (cached_0_" << i << "_[tmp.array_id]->null_count()) {"
+                    << std::endl;
       left_valid_ss << "if (cached_0_" << i << "_[tmp.array_id]->IsNull(tmp.id)) {"
                     << std::endl;
       left_valid_ss << "  RETURN_NOT_OK(builder_0_" << i << "_->AppendNull());"
@@ -662,10 +662,10 @@ class ConditionedJoinArraysKernel::Impl {
       right_valid_ss << "}" << std::endl;
       right_valid_ss << "} else {" << std::endl;
       right_valid_ss << "  RETURN_NOT_OK(builder_1_" << i << "_->Append(cached_1_" << i
-         << "_->GetView(i)));" << std::endl;
+                     << "_->GetView(i)));" << std::endl;
       right_valid_ss << "}" << std::endl;
-      right_null_ss << "RETURN_NOT_OK(builder_1_" << i << "_->AppendNull());" << std::endl;
-
+      right_null_ss << "RETURN_NOT_OK(builder_1_" << i << "_->AppendNull());"
+                    << std::endl;
     }
     std::string shuffle_str;
     if (cond_check) {
@@ -689,7 +689,7 @@ class ConditionedJoinArraysKernel::Impl {
       right_value += "item_content{";
       for (auto i = 0; i < right_key_index_list.size(); i++) {
         right_value += "typed_array_" + std::to_string(i) + "->GetView(i)";
-        if (i != right_key_index_list.size() -1) {
+        if (i != right_key_index_list.size() - 1) {
           right_value += ",";
         }
       }
@@ -754,9 +754,8 @@ class ConditionedJoinArraysKernel::Impl {
       right_valid_ss << "}" << std::endl;
       right_valid_ss << "} else {" << std::endl;
       right_valid_ss << "  RETURN_NOT_OK(builder_1_" << i << "_->Append(cached_1_" << i
-         << "_->GetView(i)));" << std::endl;
+                     << "_->GetView(i)));" << std::endl;
       right_valid_ss << "}" << std::endl;
-
     }
     std::string shuffle_str;
     if (cond_check) {
@@ -852,10 +851,10 @@ class ConditionedJoinArraysKernel::Impl {
     }
     std::string right_value;
     if (right_key_index_list.size() > 1) {
-            right_value += "item_content{";
+      right_value += "item_content{";
       for (auto i = 0; i < right_key_index_list.size(); i++) {
         right_value += "typed_array_" + std::to_string(i) + "->GetView(i)";
-        if (i != right_key_index_list.size() -1) {
+        if (i != right_key_index_list.size() - 1) {
           right_value += ",";
         }
       }
@@ -864,7 +863,8 @@ class ConditionedJoinArraysKernel::Impl {
       right_value = "typed_array_0->GetView(i)";
     }
     return R"(
-      auto right_content = )" + right_value + R"(;
+      auto right_content = )" +
+           right_value + R"(;
              if (!typed_array_0->IsNull(i)) {
   while (left_it->hasnext() && left_it->value() < right_content) {
     left_it->next();
@@ -892,9 +892,9 @@ class ConditionedJoinArraysKernel::Impl {
     std::stringstream left_valid_ss;
     std::stringstream right_valid_ss;
 
-    right_exist_ss
-        << "const bool exist = true; RETURN_NOT_OK(builder_1_exists_->Append(exist));"
-        << std::endl;
+    right_exist_ss << "const bool exist = true; "
+                      "RETURN_NOT_OK(builder_1_exists_->Append(exist));"
+                   << std::endl;
     right_not_exist_ss << "const bool not_exist = false; "
                           "RETURN_NOT_OK(builder_1_exists_->Append(not_exist));"
                        << std::endl;
@@ -921,10 +921,10 @@ class ConditionedJoinArraysKernel::Impl {
     }
     std::string right_value;
     if (right_key_index_list.size() > 1) {
-            right_value += "item_content{";
+      right_value += "item_content{";
       for (auto i = 0; i < right_key_index_list.size(); i++) {
         right_value += "typed_array_" + std::to_string(i) + "->GetView(i)";
-        if (i != right_key_index_list.size() -1) {
+        if (i != right_key_index_list.size() - 1) {
           right_value += ",";
         }
       }
@@ -934,7 +934,8 @@ class ConditionedJoinArraysKernel::Impl {
     }
     return R"(
         // existence join
-        auto right_content = )" + right_value + R"(;
+        auto right_content = )" +
+           right_value + R"(;
         if (!typed_array_0->IsNull(i)) {
           while (left_it->hasnext() && left_it->value() < right_content) {
             left_it->next();
@@ -953,7 +954,8 @@ class ConditionedJoinArraysKernel::Impl {
             if (last_match_idx == i) {
             continue;
             }
-            )" + right_valid_ss.str() + right_not_exist_ss.str() + R"(
+            )" +
+           right_valid_ss.str() + right_not_exist_ss.str() + R"(
             out_length += 1;
           }
           if (!left_it->hasnext()) {
@@ -974,14 +976,15 @@ class ConditionedJoinArraysKernel::Impl {
                             right_key_index_list);
       } break;
       case 1: { /*Outer Join*/
-        return GetOuterJoin(cond_check, left_shuffle_index_list,
-                            right_shuffle_index_list, right_key_index_list);
+        return GetOuterJoin(cond_check, left_shuffle_index_list, right_shuffle_index_list,
+                            right_key_index_list);
       } break;
       case 2: { /*Anti Join*/
         return GetAntiJoin(cond_check, left_shuffle_index_list, right_shuffle_index_list);
       } break;
       case 3: { /*Semi Join*/
-        return GetSemiJoin(cond_check, left_shuffle_index_list, right_shuffle_index_list, right_key_index_list);
+        return GetSemiJoin(cond_check, left_shuffle_index_list, right_shuffle_index_list,
+                           right_key_index_list);
       } break;
       case 4: { /*Existence Join*/
         return GetExistenceJoin(cond_check, left_shuffle_index_list,
@@ -992,7 +995,8 @@ class ConditionedJoinArraysKernel::Impl {
                                 right_shuffle_index_list, right_key_index_list);
       } break;
       default:
-        std::cout << "ConditionedProbeArraysTypedImpl only support join type: InnerJoin, "
+        std::cout << "ConditionedProbeArraysTypedImpl only support join type: "
+                     "InnerJoin, "
                      "RightJoin"
                   << std::endl;
         throw;
@@ -1100,8 +1104,7 @@ class ConditionedJoinArraysKernel::Impl {
     std::string tuple_str;
     if (multiple_cols) {
       for (int i = 0; i < size; i++) {
-        std::string local_tuple =
-            "typed_array_" + std::to_string(i) + ",";
+        std::string local_tuple = "typed_array_" + std::to_string(i) + ",";
         tuple_str += local_tuple;
       }
     } else {
@@ -1124,7 +1127,6 @@ class ConditionedJoinArraysKernel::Impl {
       tuple_str.erase(tuple_str.end() - 1, tuple_str.end());
       ss << std::endl << "return std::forward_as_tuple(" + tuple_str + ");" << std::endl;
     } else {
-
       ss << std::endl << "return it->GetView(segment_len);" << std::endl;
     }
     return ss.str();
@@ -1155,7 +1157,8 @@ class ConditionedJoinArraysKernel::Impl {
       ss << "auto concat_kernel_arr_list = {" << evaluate_encode_join_key_str << "};"
          << std::endl;
       ss << "std::shared_ptr<arrow::Array> hash_in;" << std::endl;
-      ss << "RETURN_NOT_OK(hash_kernel_->Evaluate(concat_kernel_arr_list, &hash_in));"
+      ss << "RETURN_NOT_OK(hash_kernel_->Evaluate(concat_kernel_arr_list, "
+            "&hash_in));"
          << std::endl;
       ss << "auto typed_array = std::make_shared<Int32Array>(hash_in);" << std::endl;
     } else {
@@ -1189,10 +1192,12 @@ class ConditionedJoinArraysKernel::Impl {
       item_content_str = "nonstd::sv_lite::string_view";
     }
     list_tiem_str = R"(
-typedef  std::shared_ptr<)" + hash_map_type_str +
+typedef  std::shared_ptr<)" +
+                    hash_map_type_str +
                     R"(> list_item;
-typedef )" + item_content_str + " item_content;";
-    
+typedef )" + item_content_str +
+                    " item_content;";
+
     std::vector<std::string> tuple_types;
     std::vector<std::string> content_tuple_types;
 
@@ -1201,7 +1206,8 @@ typedef )" + item_content_str + " item_content;";
         #include <tuple>)";
 
       for (auto& key : left_key_index_list) {
-        tuple_types.push_back("std::shared_ptr<" + GetTypeString(left_field_list[key]->type(), "Array") + ">");
+        tuple_types.push_back("std::shared_ptr<" +
+                              GetTypeString(left_field_list[key]->type(), "Array") + ">");
         content_tuple_types.push_back(GetCTypeString(left_field_list[key]->type()));
       }
 
@@ -1212,11 +1218,11 @@ typedef )" + item_content_str + " item_content;";
       }
       // remove the ending ','
       tuple_define_str.erase(tuple_define_str.end() - 1, tuple_define_str.end());
-      
+
       std::string content_define_str = "std::tuple<";
       for (auto type : content_tuple_types) {
         if (type == "std::string") {
-           type = "nonstd::sv_lite::string_view";
+          type = "nonstd::sv_lite::string_view";
         }
 
         content_define_str += type;
@@ -1228,7 +1234,8 @@ typedef )" + item_content_str + " item_content;";
       list_tiem_str += R"(
         typedef )" + tuple_define_str +
                        R"(> list_item;
-        typedef )" + content_define_str + "> item_content;";
+        typedef )" + content_define_str +
+                       "> item_content;";
     } else {
       tuple_types.push_back(hash_map_type_str);
     }
@@ -1239,7 +1246,7 @@ typedef )" + item_content_str + " item_content;";
     // TODO: fix multi columns case
     std::string condition_check_str;
     if (func_node) {
-      //TODO: move to use new API
+      // TODO: move to use new API
       condition_check_str =
           GetConditionCheckFunc(func_node, left_field_list, right_field_list,
                                 &left_cond_index_list, &right_cond_index_list);
@@ -1297,13 +1304,15 @@ typedef )" + item_content_str + " item_content;";
     auto make_tuple_str = GetTupleStr(multiple_cols, left_key_index_list.size());
     auto make_idarray_str = GetIdArrayStr(cond_check, join_type);
     auto make_list_str = GetListStr(multiple_cols, left_key_index_list.size());
-    auto make_list_content_str = GetListContentStr(multiple_cols, left_key_index_list.size());
+    auto make_list_content_str =
+        GetListContentStr(multiple_cols, left_key_index_list.size());
 
     return BaseCodes() + R"(
+#include <numeric>
+
 #include "codegen/arrow_compute/ext/array_item_index.h"
 #include "precompile/builder.h"
 #include "precompile/gandiva.h"
-#include <numeric>
 using namespace sparkcolumnarplugin::precompile;
 )" + hash_map_include_str +
            R"(
@@ -1333,7 +1342,7 @@ void setpos(int64_t cur_idx, int64_t cur_segment_len, int64_t cur_passed_len) {
 
 item_content value() {
   )" + make_list_content_str +
-R"(}
+           R"(}
 
 bool hasnext() {
  return (passed_len <= total_len_-1);
@@ -1369,8 +1378,7 @@ class TypedProberImpl : public CodeGenBase {
 
   arrow::Status Evaluate(const ArrayList& in) override {
     )" + evaluate_cache_insert_str +
-           evaluate_get_typed_array_str +
-           make_list_str +
+           evaluate_get_typed_array_str + make_list_str +
            R"(
 
     idx_to_arrarid_.push_back(typed_array_0->length());
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc
index 8373315a0..2bb7b5600 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/probe_kernel.cc
@@ -604,7 +604,8 @@ class ConditionedProbeArraysKernel::Impl {
     std::stringstream left_valid_ss;
     std::stringstream right_valid_ss;
     for (auto i : left_shuffle_index_list) {
-      left_valid_ss << "if (cached_0_" << i << "_[tmp.array_id]->null_count()) {" << std::endl;
+      left_valid_ss << "if (cached_0_" << i << "_[tmp.array_id]->null_count()) {"
+                    << std::endl;
       left_valid_ss << "if (cached_0_" << i << "_[tmp.array_id]->IsNull(tmp.id)) {"
                     << std::endl;
       left_valid_ss << "  RETURN_NOT_OK(builder_0_" << i << "_->AppendNull());"
@@ -630,7 +631,7 @@ class ConditionedProbeArraysKernel::Impl {
       right_valid_ss << "}" << std::endl;
       right_valid_ss << "} else {" << std::endl;
       right_valid_ss << "  RETURN_NOT_OK(builder_1_" << i << "_->Append(cached_1_" << i
-         << "_->GetView(i)));" << std::endl;
+                     << "_->GetView(i)));" << std::endl;
       right_valid_ss << "}" << std::endl;
     }
     std::string shuffle_str;
@@ -687,7 +688,7 @@ class ConditionedProbeArraysKernel::Impl {
       right_valid_ss << "}" << std::endl;
       right_valid_ss << "} else {" << std::endl;
       right_valid_ss << "  RETURN_NOT_OK(builder_1_" << i << "_->Append(cached_1_" << i
-         << "_->GetView(i)));" << std::endl;
+                     << "_->GetView(i)));" << std::endl;
       right_valid_ss << "}" << std::endl;
     }
     std::string shuffle_str;
@@ -782,9 +783,9 @@ class ConditionedProbeArraysKernel::Impl {
     std::stringstream right_valid_ss;
     auto right_size = right_shuffle_index_list.size();
 
-    right_exist_ss
-        << "const bool exist = true; RETURN_NOT_OK(builder_1_exists_->Append(exist));"
-        << std::endl;
+    right_exist_ss << "const bool exist = true; "
+                      "RETURN_NOT_OK(builder_1_exists_->Append(exist));"
+                   << std::endl;
     right_not_exist_ss << "const bool not_exist = false; "
                           "RETURN_NOT_OK(builder_1_exists_->Append(not_exist));"
                        << std::endl;
@@ -854,7 +855,8 @@ class ConditionedProbeArraysKernel::Impl {
                                 right_shuffle_index_list);
       } break;
       default:
-        std::cout << "ConditionedProbeArraysTypedImpl only support join type: InnerJoin, "
+        std::cout << "ConditionedProbeArraysTypedImpl only support join type: "
+                     "InnerJoin, "
                      "RightJoin"
                   << std::endl;
         throw;
@@ -959,7 +961,8 @@ class ConditionedProbeArraysKernel::Impl {
       ss << "auto concat_kernel_arr_list = {" << evaluate_encode_join_key_str << "};"
          << std::endl;
       ss << "std::shared_ptr<arrow::Array> hash_in;" << std::endl;
-      ss << "RETURN_NOT_OK(hash_kernel_->Evaluate(concat_kernel_arr_list, &hash_in));"
+      ss << "RETURN_NOT_OK(hash_kernel_->Evaluate(concat_kernel_arr_list, "
+            "&hash_in));"
          << std::endl;
       ss << "auto typed_array = std::make_shared<Int64Array>(hash_in);" << std::endl;
     } else {
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc
index c6ba32e6f..98f89ab2b 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc
@@ -16,20 +16,20 @@
  */
 
 #include <arrow/array/concatenate.h>
-#include <arrow/util/bitmap_ops.h>
 #include <arrow/compute/api.h>
 #include <arrow/type.h>
 #include <arrow/type_fwd.h>
 #include <arrow/type_traits.h>
+#include <arrow/util/bitmap_ops.h>
 #include <gandiva/projector.h>
 
 #include <algorithm>
+#include <cmath>
 #include <cstdint>
 #include <iostream>
 #include <memory>
 #include <numeric>
 #include <vector>
-#include <cmath>
 
 #include "array_appender.h"
 #include "cmp_function.h"
@@ -48,24 +48,24 @@
 /**
                  The Overall Implementation of Sort Kernel
  * In general, there are four kenels to use when sorting for different data.
-   SortArraysToIndicesKernel::Impl is the base class, and other three kernels, including 
-   SortInplaceKernel, SortOnekeyKernel and SortMultiplekeyKernel, extend it.
+   SortArraysToIndicesKernel::Impl is the base class, and other three kernels,
+including SortInplaceKernel, SortOnekeyKernel and SortMultiplekeyKernel, extend
+it.
  * Usage:
-   SortInplaceKernel is used when sorting for one non-string and non-bool col without 
-   payload.
-   SortOnekeyKernel is used when sorting for single key with payload, and one string 
-   or bool col without payload.
-   SortMultiplekeyKernel is used when sorting for multiple keys and codegen is disabled.
-   SortArraysToIndicesKernel::Impl is used when sorting for multiple keys and codegen 
-   is enabled.
- * In these kernels, usually ska_sort is used for asc direciton, and std sort is used 
-   for desc direciton. Timsort is used in multiple-key sort.
+   SortInplaceKernel is used when sorting for one non-string and non-bool col
+without payload. SortOnekeyKernel is used when sorting for single key with
+payload, and one string or bool col without payload. SortMultiplekeyKernel is
+used when sorting for multiple keys and codegen is disabled.
+   SortArraysToIndicesKernel::Impl is used when sorting for multiple keys and
+codegen is enabled.
+ * In these kernels, usually ska_sort is used for asc direciton, and std sort is
+used for desc direciton. Timsort is used in multiple-key sort.
  * Before sorting, projection and partition can be conducted.
-   If projection is required, it is completed before sorting, and the projected cols are 
-   used to do comparison.
-   If partition is required, null, NaN(for double and float only) and valid value are
-   partitioned before sorting.
-   FIXME: 1. datatype change after projection is not supported in SortInplaceKernel.
+   If projection is required, it is completed before sorting, and the projected
+cols are used to do comparison. If partition is required, null, NaN(for double
+and float only) and valid value are partitioned before sorting.
+   FIXME: 1. datatype change after projection is not supported in
+SortInplaceKernel.
 **/
 
 namespace sparkcolumnarplugin {
@@ -76,22 +76,20 @@ using ArrayList = std::vector<std::shared_ptr<arrow::Array>>;
 using namespace sparkcolumnarplugin::precompile;
 
 template <typename T>
-using is_number_bool_date = std::integral_constant<bool, 
-    arrow::is_number_type<T>::value || arrow::is_boolean_type<T>::value || 
-    arrow::is_date_type<T>::value>;
+using is_number_bool_date =
+    std::integral_constant<bool, arrow::is_number_type<T>::value ||
+                                     arrow::is_boolean_type<T>::value ||
+                                     arrow::is_date_type<T>::value>;
 
 ///////////////  SortArraysToIndices  ////////////////
 class SortArraysToIndicesKernel::Impl {
  public:
   Impl() {}
-  Impl(arrow::compute::ExecContext* ctx,
-       std::shared_ptr<arrow::Schema> result_schema,
+  Impl(arrow::compute::ExecContext* ctx, std::shared_ptr<arrow::Schema> result_schema,
        std::shared_ptr<gandiva::Projector> key_projector,
        std::vector<std::shared_ptr<arrow::DataType>> projected_types,
        std::vector<std::shared_ptr<arrow::Field>> key_field_list,
-       std::vector<bool> sort_directions, 
-       std::vector<bool> nulls_order,
-       bool NaN_check)
+       std::vector<bool> sort_directions, std::vector<bool> nulls_order, bool NaN_check)
       : ctx_(ctx),
         result_schema_(result_schema),
         key_projector_(key_projector),
@@ -100,9 +98,9 @@ class SortArraysToIndicesKernel::Impl {
         nulls_order_(nulls_order),
         projected_types_(projected_types),
         NaN_check_(NaN_check) {
-    #ifdef DEBUG
-        std::cout << "use SortArraysToIndicesKernel::Impl" << std::endl;
-    #endif
+#ifdef DEBUG
+    std::cout << "use SortArraysToIndicesKernel::Impl" << std::endl;
+#endif
     for (auto field : key_field_list) {
       auto indices = result_schema->GetAllFieldIndices(field->name());
       if (indices.size() != 1) {
@@ -175,7 +173,7 @@ class SortArraysToIndicesKernel::Impl {
       cached_.resize(col_num_ + 1);
     }
     for (int i = 0; i < col_num_; i++) {
-        cached_[i].push_back(in[i]);
+      cached_[i].push_back(in[i]);
     }
     if (!key_projector_) {
       ArrayList key_cols;
@@ -184,7 +182,7 @@ class SortArraysToIndicesKernel::Impl {
       }
       sorter_->Evaluate(key_cols);
     } else {
-      std::vector<std::shared_ptr<arrow::Array>> projected_batch; 
+      std::vector<std::shared_ptr<arrow::Array>> projected_batch;
       // do projection here, and the projected arrays are used for comparison
       auto length = in.size() > 0 ? in[0]->length() : 0;
       auto in_batch = arrow::RecordBatch::Make(result_schema_, length, in);
@@ -262,15 +260,15 @@ class SortArraysToIndicesKernel::Impl {
     std::vector<std::shared_ptr<TypedSorterCodeGenImpl>> key_typed_codegen_list;
     if (key_projector_) {
       for (auto field : projected_field_list_) {
-        auto codegen = std::make_shared<TypedSorterCodeGenImpl>(
-            std::to_string(indice), field->type());
+        auto codegen = std::make_shared<TypedSorterCodeGenImpl>(std::to_string(indice),
+                                                                field->type());
         key_typed_codegen_list.push_back(codegen);
         indice++;
       }
     } else {
       for (auto field : key_field_list_) {
-        auto codegen = std::make_shared<TypedSorterCodeGenImpl>(
-            std::to_string(indice), field->type());
+        auto codegen = std::make_shared<TypedSorterCodeGenImpl>(std::to_string(indice),
+                                                                field->type());
         key_typed_codegen_list.push_back(codegen);
         indice++;
       }
@@ -320,7 +318,7 @@ class TypedSorterImpl : public CodeGenBase {
     // we should support nulls first and nulls last here
     // we should also support desc and asc here
     )" + comp_func_str +
-         comp_func_str_without_null +
+           comp_func_str_without_null +
            R"(
     // initiate buffer for all arrays
     std::shared_ptr<arrow::Buffer> indices_buf;
@@ -371,10 +369,11 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
   std::string GetCachedInsert() {
     std::stringstream ss;
     for (int i = 0; i < key_index_list_.size(); i++) {
-      ss << "cached_" << i << "_.push_back(std::make_shared<ArrayType_" << i 
-         << ">(in[" << i << "]));\n"
+      ss << "cached_" << i << "_.push_back(std::make_shared<ArrayType_" << i << ">(in["
+         << i
+         << "]));\n"
          // update has_null_
-         << "if (!has_null_ && cached_" << i << "_[cached_" << i 
+         << "if (!has_null_ && cached_" << i << "_[cached_" << i
          << "_.size() - 1]->null_count() > 0) {"
          << "has_null_ = true;}" << std::endl;
     }
@@ -384,11 +383,12 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
   std::string GetSortFunction() {
     std::stringstream ss;
     ss << "if (has_null_) {\n"
-       << "gfx::timsort(indices_begin, indices_begin + items_total_, comp);} else {\n" 
-       << "gfx::timsort(indices_begin, indices_begin + items_total_, comp_without_null);}"
+       << "gfx::timsort(indices_begin, indices_begin + items_total_, comp);} "
+          "else {\n"
+       << "gfx::timsort(indices_begin, indices_begin + items_total_, "
+          "comp_without_null);}"
        << std::endl;
     return ss.str();
-
   }
 
   std::string GetCompFunction(bool has_null) {
@@ -400,14 +400,15 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
       projected = false;
     }
     if (has_null) {
-      ss << "auto comp = [this](const ArrayItemIndexS& x, const ArrayItemIndexS& y) {"
-         << GetCompFunction_(0, projected, key_field_list_,
-                             projected_types_, sort_directions_, nulls_order_);
+      ss << "auto comp = [this](const ArrayItemIndexS& x, const "
+            "ArrayItemIndexS& y) {"
+         << GetCompFunction_(0, projected, key_field_list_, projected_types_,
+                             sort_directions_, nulls_order_);
     } else {
       ss << "auto comp_without_null = "
          << "[this](const ArrayItemIndexS& x, const ArrayItemIndexS& y) {"
-         << GetCompFunction_Without_Null_(0, projected, key_field_list_,
-                                          projected_types_, sort_directions_);
+         << GetCompFunction_Without_Null_(0, projected, key_field_list_, projected_types_,
+                                          sort_directions_);
     }
     ss << "};\n";
     return ss.str();
@@ -441,11 +442,11 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
         array + std::to_string(cur_key_idx) + "_[y.array_id]->GetString(y.id)";
     auto is_x_null = array + std::to_string(cur_key_idx) + "_[x.array_id]->IsNull(x.id)";
     auto is_y_null = array + std::to_string(cur_key_idx) + "_[y.array_id]->IsNull(y.id)";
-    auto x_null_count = 
+    auto x_null_count =
         array + std::to_string(cur_key_idx) + "_[x.array_id]->null_count() > 0";
-    auto y_null_count = 
+    auto y_null_count =
         array + std::to_string(cur_key_idx) + "_[y.array_id]->null_count() > 0";
-    auto x_null = "(" + x_null_count + " && " + is_x_null + " )";  
+    auto x_null = "(" + x_null_count + " && " + is_x_null + " )";
     auto y_null = "(" + y_null_count + " && " + is_y_null + " )";
     auto is_x_nan = "std::isnan(" + x_num_value + ")";
     auto is_y_nan = "std::isnan(" + y_num_value + ")";
@@ -467,7 +468,8 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
     } else {
       ss << "return true;\n}";
     }
-    // If datatype is floating, we need to do partition for NaN if NaN check is enabled
+    // If datatype is floating, we need to do partition for NaN if NaN check is
+    // enabled
     if (data_type->id() == arrow::Type::DOUBLE || data_type->id() == arrow::Type::FLOAT) {
       if (NaN_check_) {
         ss << "else if (" << is_x_nan << ") {\n";
@@ -516,16 +518,15 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
       if (NaN_check_ && (data_type->id() == arrow::Type::DOUBLE ||
                          data_type->id() == arrow::Type::FLOAT)) {
         // need to check NaN
-        ss << "if ((" << x_null << " && " << y_null << ") || (" << is_x_nan
-           << " && " << is_y_nan << ") || (" << x_num_value << " == " << y_num_value
-           << ")) {";
+        ss << "if ((" << x_null << " && " << y_null << ") || (" << is_x_nan << " && "
+           << is_y_nan << ") || (" << x_num_value << " == " << y_num_value << ")) {";
       } else {
         ss << "if ((" << x_null << " && " << y_null << ") || (" << x_num_value
            << " == " << y_num_value << ")) {";
       }
     }
-    ss << GetCompFunction_(cur_key_idx + 1, projected, key_field_list, 
-                           projected_types, sort_directions, nulls_order)
+    ss << GetCompFunction_(cur_key_idx + 1, projected, key_field_list, projected_types,
+                           sort_directions, nulls_order)
        << "} else { " << comp_str << "}";
     return ss.str();
   }
@@ -560,8 +561,9 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
 
     // Multiple keys sorting w/ nulls first/last is supported.
     std::stringstream ss;
-    // If datatype is floating, we need to do partition for NaN if NaN check is enabled
-    if (NaN_check_ && (data_type->id() == arrow::Type::DOUBLE || 
+    // If datatype is floating, we need to do partition for NaN if NaN check is
+    // enabled
+    if (NaN_check_ && (data_type->id() == arrow::Type::DOUBLE ||
                        data_type->id() == arrow::Type::FLOAT)) {
       ss << "if (" << is_x_nan << ") {\n";
       if (asc) {
@@ -594,9 +596,9 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
         ss << "return " << x_num_value << " > " << y_num_value << ";\n";
       }
     }
-    if (NaN_check_ && (data_type->id() == arrow::Type::DOUBLE || 
+    if (NaN_check_ && (data_type->id() == arrow::Type::DOUBLE ||
                        data_type->id() == arrow::Type::FLOAT)) {
-      ss << "}" << std::endl; 
+      ss << "}" << std::endl;
     }
     comp_str = ss.str();
     if ((cur_key_idx + 1) == sort_directions.size()) {
@@ -610,13 +612,13 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
       if (NaN_check_ && (data_type->id() == arrow::Type::DOUBLE ||
                          data_type->id() == arrow::Type::FLOAT)) {
         // need to check NaN
-        ss << "if ((" << is_x_nan << " && " << is_y_nan << ") || (" 
-           << x_num_value << " == " << y_num_value << ")) {";
+        ss << "if ((" << is_x_nan << " && " << is_y_nan << ") || (" << x_num_value
+           << " == " << y_num_value << ")) {";
       } else {
         ss << "if (" << x_num_value << " == " << y_num_value << ") {";
       }
     }
-    ss << GetCompFunction_Without_Null_(cur_key_idx + 1, projected, key_field_list, 
+    ss << GetCompFunction_Without_Null_(cur_key_idx + 1, projected, key_field_list,
                                         projected_types, sort_directions)
        << "} else { " << comp_str << "}";
     return ss.str();
@@ -639,10 +641,11 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
       *out = sort_relation_;
       return arrow::Status::OK();
     }
+
    private:
     std::shared_ptr<SortRelation> sort_relation_;
   };
-  
+
   class SorterResultIterator : public ResultIterator<arrow::RecordBatch> {
    public:
     SorterResultIterator(arrow::compute::ExecContext* ctx,
@@ -661,7 +664,7 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
       for (int i = 0; i < col_num_; i++) {
         auto field = schema->field(i);
         std::shared_ptr<AppenderBase> appender;
-        THROW_NOT_OK(MakeAppender(ctx_, field->type(), appender_type, &appender));
+        THROW_NOT_OK(MakeUnsafeAppender(ctx_, field->type(), appender_type, &appender));
         appender_list_.push_back(appender);
       }
       for (int i = 0; i < col_num_; i++) {
@@ -674,7 +677,7 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
       }
       batch_size_ = GetBatchSize();
     }
-    ~SorterResultIterator(){}
+    ~SorterResultIterator() {}
 
     std::string ToString() override { return "SortArraysToIndicesResultIterator"; }
 
@@ -689,11 +692,13 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
       auto length = (total_length_ - offset_) > batch_size_ ? batch_size_
                                                             : (total_length_ - offset_);
       uint64_t count = 0;
-      while (count < length) {
-        auto item = indices_begin_ + offset_ + count++;
-        for (int i = 0; i < col_num_; i++) {
+      for (int i = 0; i < col_num_; i++) {
+        RETURN_NOT_OK(appender_list_[i]->Reserve(length));
+        while (count < length) {
+          auto item = indices_begin_ + offset_ + count++;
           RETURN_NOT_OK(appender_list_[i]->Append(item->array_id, item->id));
         }
+        count = 0;
       }
       offset_ += length;
       ArrayList arrays;
@@ -785,7 +790,7 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
   // This function is used for non-float and non-double data without null value.
   template <typename TYPE>
   auto SortNoNull(TYPE* indices_begin, TYPE* indices_end) ->
-      typename std::enable_if_t<!std::is_floating_point<TYPE>::value && 
+      typename std::enable_if_t<!std::is_floating_point<TYPE>::value &&
                                 !std::is_same<TYPE, arrow::Decimal128>::value> {
     if (asc_) {
       ska_sort(indices_begin, indices_end);
@@ -795,7 +800,7 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
     }
   }
 
-    // This function is used for non-float and non-double data without null value.
+  // This function is used for non-float and non-double data without null value.
   template <typename TYPE>
   auto SortNoNull(TYPE* indices_begin, TYPE* indices_end) ->
       typename std::enable_if_t<std::is_same<TYPE, arrow::Decimal128>::value> {
@@ -881,7 +886,7 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
   // We should do partition for null.
   template <typename TYPE, typename ArrayType>
   auto Sort(int64_t* indices_begin, int64_t* indices_end, const ArrayType& values) ->
-      typename std::enable_if_t<!std::is_floating_point<TYPE>::value && 
+      typename std::enable_if_t<!std::is_floating_point<TYPE>::value &&
                                 !std::is_same<TYPE, arrow::Decimal128>::value> {
     std::iota(indices_begin, indices_end, 0);
     if (asc_) {
@@ -925,12 +930,14 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
         return values.GetView(left) < values.GetView(right);
       };
       if (nulls_first_) {
-        auto nulls_end = std::partition(indices_begin, indices_end, 
-            [&values](uint64_t ind) { return values.IsNull(ind); });
+        auto nulls_end =
+            std::partition(indices_begin, indices_end,
+                           [&values](uint64_t ind) { return values.IsNull(ind); });
         std::sort(nulls_end, indices_end, comp);
       } else {
-        auto nulls_begin = std::partition(indices_begin, indices_end, 
-            [&values](uint64_t ind) { return !values.IsNull(ind); });
+        auto nulls_begin =
+            std::partition(indices_begin, indices_end,
+                           [&values](uint64_t ind) { return !values.IsNull(ind); });
         std::sort(indices_begin, nulls_begin, comp);
       }
     } else {
@@ -938,12 +945,14 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
         return values.GetView(left) > values.GetView(right);
       };
       if (nulls_first_) {
-        auto nulls_end = std::partition(indices_begin, indices_end,
-            [&values](uint64_t ind) { return values.IsNull(ind); });
+        auto nulls_end =
+            std::partition(indices_begin, indices_end,
+                           [&values](uint64_t ind) { return values.IsNull(ind); });
         std::sort(nulls_end, indices_end, comp);
       } else {
-        auto nulls_begin = std::partition(indices_begin, indices_end,
-            [&values](uint64_t ind) { return !values.IsNull(ind); });
+        auto nulls_begin =
+            std::partition(indices_begin, indices_end,
+                           [&values](uint64_t ind) { return !values.IsNull(ind); });
         std::sort(indices_begin, nulls_begin, comp);
       }
     }
@@ -1020,14 +1029,14 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
     std::string ToString() override { return "SortArraysToIndicesResultIterator"; }
 
     bool HasNext() override {
-      if (total_offset_ >= total_length_) {
+      if (offset_ >= total_length_) {
         return false;
       }
       return true;
     }
 
-    // This class is used to copy a piece of memory from the sorted ArrayData 
-    // to a result array. 
+    // This class is used to copy a piece of memory from the sorted ArrayData
+    // to a result array.
     // It can be used only in sorted data because of null count calculation.
     template <typename KeyType>
     class SliceImpl {
@@ -1046,21 +1055,25 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
         for (auto& data : out_data_.child_data) {
           data = std::make_shared<arrow::ArrayData>();
         }
-        // decide null_count
-        if (null_first) {
-          if ((offset + length) > null_total_) {
-            out_data_.null_count =
-                (null_total_ - offset > 0) ? (null_total_ - offset) : 0;
-          } else {
-            out_data_.null_count = length;
-          }
+        // decide null_count of this sliced array
+        if (null_total == 0) {
+          out_data_.null_count = 0;
         } else {
-          auto valid_total = total_length - null_total_;
-          if ((offset + length) < valid_total) {
-            out_data_.null_count = 0;
+          if (null_first) {
+            if ((offset + length) > null_total_) {
+              out_data_.null_count =
+                  (null_total_ - offset > 0) ? (null_total_ - offset) : 0;
+            } else {
+              out_data_.null_count = length;
+            }
           } else {
-            out_data_.null_count =
-                (offset - valid_total) > 0 ? length : (offset + length - valid_total);
+            auto valid_total = total_length - null_total_;
+            if ((offset + length) < valid_total) {
+              out_data_.null_count = 0;
+            } else {
+              out_data_.null_count =
+                  (offset - valid_total) > 0 ? length : (offset + length - valid_total);
+            }
           }
         }
       }
@@ -1098,7 +1111,7 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
         bool AllSet() const { return data == nullptr; }
       };
 
-      arrow::Status SliceBuffer(const std::shared_ptr<arrow::Buffer>& buffer, 
+      arrow::Status SliceBuffer(const std::shared_ptr<arrow::Buffer>& buffer,
                                 std::shared_ptr<arrow::Buffer>* out) {
         ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer(size * length_, pool_));
         auto out_data = (*out)->mutable_data();
@@ -1107,30 +1120,9 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
         return arrow::Status::OK();
       }
 
-      arrow::Status SliceBitmapImpl(
-          const Bitmap& bitmap, std::shared_ptr<arrow::Buffer>* out) {
-        auto length = bitmap.range.length;
-        auto offset = bitmap.range.offset;
-        ARROW_ASSIGN_OR_RAISE(*out, AllocateBitmap(length, pool_));
-        uint8_t* dst = (*out)->mutable_data();
-
-        int64_t bitmap_offset = 0;
-        if (bitmap.AllSet()) {
-          arrow::BitUtil::SetBitsTo(dst, offset, length, true);
-        } else {
-          arrow::internal::CopyBitmap(bitmap.data, offset, length, dst, bitmap_offset);
-        }
-
-        // finally (if applicable) zero out any trailing bits
-        if (auto preceding_bits = arrow::BitUtil::kPrecedingBitmask[length_ % 8]) {
-          dst[length_ / 8] &= preceding_bits;
-        }
-        return arrow::Status::OK();
-      }
-
       arrow::Status SliceBitmap(const std::shared_ptr<arrow::Buffer>& buffer,
                                 std::shared_ptr<arrow::Buffer>* out) {
-        Range range(size * offset_, size * length_);
+        Range range(offset_, length_);
         Bitmap bitmap = Bitmap(buffer, range);
 
         auto length = bitmap.range.length;
@@ -1138,16 +1130,10 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
         ARROW_ASSIGN_OR_RAISE(*out, AllocateBitmap(length, pool_));
         uint8_t* dst = (*out)->mutable_data();
 
-        int64_t bitmap_offset = 0;
         if (bitmap.AllSet()) {
           arrow::BitUtil::SetBitsTo(dst, offset, length, true);
         } else {
-          arrow::internal::CopyBitmap(bitmap.data, offset, length, dst, bitmap_offset);
-        }
-
-        // finally (if applicable) zero out any trailing bits
-        if (auto preceding_bits = arrow::BitUtil::kPrecedingBitmask[length_ % 8]) {
-          dst[length_ / 8] &= preceding_bits;
+          arrow::internal::CopyBitmap(bitmap.data, offset, length, dst, 0);
         }
         return arrow::Status::OK();
       }
@@ -1162,16 +1148,16 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
     };
 
     arrow::Status Next(std::shared_ptr<arrow::RecordBatch>* out) {
-      auto length = (total_length_ - total_offset_) > batch_size_
-                        ? batch_size_
-                        : (total_length_ - total_offset_);
+      auto length = (total_length_ - offset_) > batch_size_ ? batch_size_
+                                                            : (total_length_ - offset_);
       arrow::ArrayData result_data = *result_arr_->data();
       arrow::ArrayData out_data;
-      SliceImpl<CTYPE>(result_data, ctx_->memory_pool(), length, total_offset_,
-                       nulls_total_, nulls_first_, total_length_).Slice(&out_data);
+      SliceImpl<CTYPE>(result_data, ctx_->memory_pool(), length, offset_, nulls_total_,
+                       nulls_first_, total_length_)
+          .Slice(&out_data);
       std::shared_ptr<arrow::Array> out_0 =
           MakeArray(std::make_shared<arrow::ArrayData>(std::move(out_data)));
-      total_offset_ += length;
+      offset_ += length;
       *out = arrow::RecordBatch::Make(result_schema_, length, {out_0});
       return arrow::Status::OK();
     }
@@ -1180,8 +1166,7 @@ class SortInplaceKernel : public SortArraysToIndicesKernel::Impl {
     using ArrayType_0 = typename arrow::TypeTraits<DATATYPE>::ArrayType;
     using BuilderType_0 = typename arrow::TypeTraits<DATATYPE>::BuilderType;
     std::shared_ptr<arrow::Array> result_arr_;
-    uint64_t total_offset_ = 0;
-    uint64_t valid_offset_ = 0;
+    uint64_t offset_ = 0;
     const uint64_t total_length_;
     const uint64_t nulls_total_;
     std::shared_ptr<arrow::Schema> result_schema_;
@@ -1333,7 +1318,8 @@ class SortOnekeyKernel : public SortArraysToIndicesKernel::Impl {
             }
           } else {
             // values should be partitioned to:
-            // null, null, ..., NaN, NaN, ..., valid-1, valid-2, ..., valid-3, ...
+            // null, null, ..., NaN, NaN, ..., valid-1, valid-2, ..., valid-3,
+            // ...
             if (!std::isnan(cached_key_[array_id]->GetView(i))) {
               (indices_end - indices_i - 1)->array_id = array_id;
               (indices_end - indices_i - 1)->id = i;
@@ -1347,7 +1333,8 @@ class SortOnekeyKernel : public SortArraysToIndicesKernel::Impl {
         } else {
           if (asc_) {
             // values should be partitioned to:
-            // valid-1, valid-2, ..., valid-3, ..., NaN, NaN, ..., null, null, ...
+            // valid-1, valid-2, ..., valid-3, ..., NaN, NaN, ..., null, null,
+            // ...
             if (!std::isnan(cached_key_[array_id]->GetView(i))) {
               (indices_begin + indices_i)->array_id = array_id;
               (indices_begin + indices_i)->id = i;
@@ -1359,7 +1346,8 @@ class SortOnekeyKernel : public SortArraysToIndicesKernel::Impl {
             }
           } else {
             // values should be partitioned to:
-            // NaN, NaN, ..., valid-1, valid-2, ..., valid-3, ..., null, null, ...
+            // NaN, NaN, ..., valid-1, valid-2, ..., valid-3, ..., null, null,
+            // ...
             if (!std::isnan(cached_key_[array_id]->GetView(i))) {
               (indices_end - nulls_total_ - indices_i - 1)->array_id = array_id;
               (indices_end - nulls_total_ - indices_i - 1)->id = i;
@@ -1450,11 +1438,11 @@ class SortOnekeyKernel : public SortArraysToIndicesKernel::Impl {
   }
 
   template <typename T>
-  auto Sort(ArrayItemIndexS* indices_begin, ArrayItemIndexS* indices_end, int64_t num_nan) 
+  auto Sort(ArrayItemIndexS* indices_begin, ArrayItemIndexS* indices_end, int64_t num_nan)
       -> typename std::enable_if_t<arrow::is_decimal_type<T>::value> {
     if (asc_) {
       auto comp = [this](const ArrayItemIndexS& x, const ArrayItemIndexS& y) {
-        return cached_key_[x.array_id]->GetView(x.id) < 
+        return cached_key_[x.array_id]->GetView(x.id) <
                cached_key_[y.array_id]->GetView(y.id);
       };
       if (nulls_first_) {
@@ -1466,7 +1454,7 @@ class SortOnekeyKernel : public SortArraysToIndicesKernel::Impl {
       }
     } else {
       auto comp = [this](const ArrayItemIndexS& x, const ArrayItemIndexS& y) {
-        return cached_key_[x.array_id]->GetView(x.id) > 
+        return cached_key_[x.array_id]->GetView(x.id) >
                cached_key_[y.array_id]->GetView(y.id);
       };
       if (nulls_first_) {
@@ -1527,45 +1515,44 @@ class SortOnekeyKernel : public SortArraysToIndicesKernel::Impl {
 };
 
 ///////////////  SortArraysMultipleKeys  ////////////////
-class SortMultiplekeyKernel  : public SortArraysToIndicesKernel::Impl {
+class SortMultiplekeyKernel : public SortArraysToIndicesKernel::Impl {
  public:
   SortMultiplekeyKernel(arrow::compute::ExecContext* ctx,
                         std::shared_ptr<arrow::Schema> result_schema,
                         std::shared_ptr<gandiva::Projector> key_projector,
                         std::vector<std::shared_ptr<arrow::DataType>> projected_types,
                         std::vector<std::shared_ptr<arrow::Field>> key_field_list,
-                        std::vector<bool> sort_directions, 
-                        std::vector<bool> nulls_order,
+                        std::vector<bool> sort_directions, std::vector<bool> nulls_order,
                         bool NaN_check)
-      : ctx_(ctx), 
-        nulls_order_(nulls_order), 
-        sort_directions_(sort_directions), 
-        result_schema_(result_schema), 
+      : ctx_(ctx),
+        nulls_order_(nulls_order),
+        sort_directions_(sort_directions),
+        result_schema_(result_schema),
         key_projector_(key_projector),
         key_field_list_(key_field_list),
         NaN_check_(NaN_check) {
-      #ifdef DEBUG
-          std::cout << "UseSortMultiplekeyKernel" << std::endl;
-      #endif
-      for (auto field : key_field_list) {
-        auto indices = result_schema->GetAllFieldIndices(field->name());
-        if (indices.size() != 1) {
-          std::cout << "[ERROR] SortArraysToIndicesKernel::Impl can't find key "
-                    << field->ToString() << " from " << result_schema->ToString()
-                    << std::endl;
-          throw;
-        }
-        key_index_list_.push_back(indices[0]);
-      }
-      col_num_ = result_schema->num_fields();
-      int i = 0;
-      for (auto type : projected_types) {
-        auto field = arrow::field(std::to_string(i), type);
-        projected_field_list_.push_back(field);
-        i++;
+#ifdef DEBUG
+    std::cout << "UseSortMultiplekeyKernel" << std::endl;
+#endif
+    for (auto field : key_field_list) {
+      auto indices = result_schema->GetAllFieldIndices(field->name());
+      if (indices.size() != 1) {
+        std::cout << "[ERROR] SortArraysToIndicesKernel::Impl can't find key "
+                  << field->ToString() << " from " << result_schema->ToString()
+                  << std::endl;
+        throw;
       }
+      key_index_list_.push_back(indices[0]);
+    }
+    col_num_ = result_schema->num_fields();
+    int i = 0;
+    for (auto type : projected_types) {
+      auto field = arrow::field(std::to_string(i), type);
+      projected_field_list_.push_back(field);
+      i++;
+    }
   }
-  ~SortMultiplekeyKernel(){}
+  ~SortMultiplekeyKernel() {}
 
   arrow::Status Evaluate(const ArrayList& in) override {
     num_batches_++;
@@ -1573,14 +1560,14 @@ class SortMultiplekeyKernel  : public SortArraysToIndicesKernel::Impl {
       cached_.resize(col_num_ + 1);
     }
     for (int i = 0; i < col_num_; i++) {
-        cached_[i].push_back(in[i]);
+      cached_[i].push_back(in[i]);
     }
     if (key_projector_) {
       int projected_col_num = projected_field_list_.size();
       if (projected_.size() <= projected_col_num) {
         projected_.resize(projected_col_num + 1);
       }
-      std::vector<std::shared_ptr<arrow::Array>> projected_batch; 
+      std::vector<std::shared_ptr<arrow::Array>> projected_batch;
       // do projection here, and the projected arrays are used for comparison
       auto length = in.size() > 0 ? in[0]->length() : 0;
       auto in_batch = arrow::RecordBatch::Make(result_schema_, length, in);
@@ -1596,14 +1583,13 @@ class SortMultiplekeyKernel  : public SortArraysToIndicesKernel::Impl {
     return arrow::Status::OK();
   }
 
-  int compareInternal(int left_array_id, int64_t left_id, int right_array_id, 
+  int compareInternal(int left_array_id, int64_t left_id, int right_array_id,
                       int64_t right_id, int keys_num) {
     int key_idx = 0;
     while (key_idx < keys_num) {
       // In comparison, 1 represents for true, 0 for false, and 2 for equal.
       int cmp_res = 2;
-      cmp_functions_[key_idx](left_array_id, right_array_id, 
-                              left_id, right_id, cmp_res);
+      cmp_functions_[key_idx](left_array_id, right_array_id, left_id, right_id, cmp_res);
       if (cmp_res != 2) {
         return cmp_res;
       }
@@ -1612,10 +1598,10 @@ class SortMultiplekeyKernel  : public SortArraysToIndicesKernel::Impl {
     return 2;
   }
 
-  bool compareRow(int left_array_id, int64_t left_id, int right_array_id, 
+  bool compareRow(int left_array_id, int64_t left_id, int right_array_id,
                   int64_t right_id, int keys_num) {
-    if (compareInternal(left_array_id, left_id, right_array_id, 
-                        right_id, keys_num) == 1) {
+    if (compareInternal(left_array_id, left_id, right_array_id, right_id, keys_num) ==
+        1) {
       return true;
     }
     return false;
@@ -1624,12 +1610,12 @@ class SortMultiplekeyKernel  : public SortArraysToIndicesKernel::Impl {
   auto Sort(ArrayItemIndexS* indices_begin, ArrayItemIndexS* indices_end) {
     int keys_num = sort_directions_.size();
     auto comp = [this, &keys_num](const ArrayItemIndexS& x, const ArrayItemIndexS& y) {
-        return compareRow(x.array_id, x.id, y.array_id, y.id, keys_num);};
+      return compareRow(x.array_id, x.id, y.array_id, y.id, keys_num);
+    };
     gfx::timsort(indices_begin, indices_begin + items_total_, comp);
   }
 
-  void Partition(ArrayItemIndexS* indices_begin, 
-                 ArrayItemIndexS* indices_end) {
+  void Partition(ArrayItemIndexS* indices_begin, ArrayItemIndexS* indices_end) {
     int64_t indices_i = 0;
     int64_t indices_null = 0;
     for (int array_id = 0; array_id < num_batches_; array_id++) {
@@ -1647,8 +1633,8 @@ class SortMultiplekeyKernel  : public SortArraysToIndicesKernel::Impl {
     int64_t buf_size = items_total_ * sizeof(ArrayItemIndexS);
     auto maybe_buffer = arrow::AllocateBuffer(buf_size, ctx_->memory_pool());
     indices_buf = *std::move(maybe_buffer);
-    ArrayItemIndexS* indices_begin = 
-      reinterpret_cast<ArrayItemIndexS*>(indices_buf->mutable_data());
+    ArrayItemIndexS* indices_begin =
+        reinterpret_cast<ArrayItemIndexS*>(indices_buf->mutable_data());
     ArrayItemIndexS* indices_end = indices_begin + items_total_;
     // do partition and sort here
     Partition(indices_begin, indices_end);
@@ -1657,13 +1643,11 @@ class SortMultiplekeyKernel  : public SortArraysToIndicesKernel::Impl {
       for (int i = 0; i < projected_field_list_.size(); i++) {
         projected_key_idx_list.push_back(i);
       }
-      MakeCmpFunction(
-          projected_, projected_field_list_, projected_key_idx_list, sort_directions_, 
-          nulls_order_, NaN_check_, cmp_functions_);
+      MakeCmpFunction(projected_, projected_field_list_, projected_key_idx_list,
+                      sort_directions_, nulls_order_, NaN_check_, cmp_functions_);
     } else {
-      MakeCmpFunction(
-          cached_, key_field_list_, key_index_list_, sort_directions_, 
-          nulls_order_, NaN_check_, cmp_functions_);
+      MakeCmpFunction(cached_, key_field_list_, key_index_list_, sort_directions_,
+                      nulls_order_, NaN_check_, cmp_functions_);
     }
     Sort(indices_begin, indices_end);
     std::shared_ptr<arrow::FixedSizeBinaryType> out_type;
@@ -1702,18 +1686,13 @@ class SortMultiplekeyKernel  : public SortArraysToIndicesKernel::Impl {
 };
 
 arrow::Status SortArraysToIndicesKernel::Make(
-    arrow::compute::ExecContext* ctx, 
-    std::shared_ptr<arrow::Schema> result_schema,
+    arrow::compute::ExecContext* ctx, std::shared_ptr<arrow::Schema> result_schema,
     gandiva::NodeVector sort_key_node,
     std::vector<std::shared_ptr<arrow::Field>> key_field_list,
-    std::vector<bool> sort_directions, 
-    std::vector<bool> nulls_order, 
-    bool NaN_check,
-    bool do_codegen,
-    int result_type, 
-    std::shared_ptr<KernalBase>* out) {
+    std::vector<bool> sort_directions, std::vector<bool> nulls_order, bool NaN_check,
+    bool do_codegen, int result_type, std::shared_ptr<KernalBase>* out) {
   *out = std::make_shared<SortArraysToIndicesKernel>(
-      ctx, result_schema, sort_key_node, key_field_list, sort_directions, nulls_order, 
+      ctx, result_schema, sort_key_node, key_field_list, sort_directions, nulls_order,
       NaN_check, do_codegen, result_type);
   return arrow::Status::OK();
 }
@@ -1732,15 +1711,11 @@ arrow::Status SortArraysToIndicesKernel::Make(
   PROCESS(arrow::Date32Type)             \
   PROCESS(arrow::Date64Type)
 SortArraysToIndicesKernel::SortArraysToIndicesKernel(
-    arrow::compute::ExecContext* ctx, 
-    std::shared_ptr<arrow::Schema> result_schema,
+    arrow::compute::ExecContext* ctx, std::shared_ptr<arrow::Schema> result_schema,
     gandiva::NodeVector sort_key_node,
     std::vector<std::shared_ptr<arrow::Field>> key_field_list,
-    std::vector<bool> sort_directions, 
-    std::vector<bool> nulls_order, 
-    bool NaN_check,
-    bool do_codegen,
-    int result_type) {
+    std::vector<bool> sort_directions, std::vector<bool> nulls_order, bool NaN_check,
+    bool do_codegen, int result_type) {
   // represents whether need to projection for sort keys
   bool pre_processed_key_ = false;
   gandiva::NodePtr key_project;
@@ -1781,14 +1756,14 @@ SortArraysToIndicesKernel::SortArraysToIndicesKernel(
           ctx, result_schema, key_projector, sort_directions, nulls_order, NaN_check));
     } else {
       switch (key_field_list[0]->type()->id()) {
-  #define PROCESS(InType)                                                               \
-    case InType::type_id: {                                                             \
-      using CType = typename arrow::TypeTraits<InType>::CType;                          \
-      impl_.reset(new SortInplaceKernel<InType, CType>(                                 \
-          ctx, result_schema, key_projector, sort_directions, nulls_order, NaN_check)); \
-    } break;
+#define PROCESS(InType)                                                               \
+  case InType::type_id: {                                                             \
+    using CType = typename arrow::TypeTraits<InType>::CType;                          \
+    impl_.reset(new SortInplaceKernel<InType, CType>(                                 \
+        ctx, result_schema, key_projector, sort_directions, nulls_order, NaN_check)); \
+  } break;
         PROCESS_SUPPORTED_TYPES(PROCESS)
-  #undef PROCESS
+#undef PROCESS
         default: {
           std::cout << "SortInplaceKernel type not supported, type is "
                     << key_field_list[0]->type() << std::endl;
@@ -1797,7 +1772,8 @@ SortArraysToIndicesKernel::SortArraysToIndicesKernel(
     }
   } else if (key_field_list.size() == 1 && result_schema->num_fields() >= 1) {
     // Will use SortOnekey when:
-    // 1. sorting for one col with payload 2. sorting for one string col or one bool col
+    // 1. sorting for one col with payload 2. sorting for one string col or one
+    // bool col
 #ifdef DEBUG
     std::cout << "UseSortOneKey" << std::endl;
 #endif
@@ -1813,12 +1789,12 @@ SortArraysToIndicesKernel::SortArraysToIndicesKernel(
             nulls_order, NaN_check));
       } else {
         switch (projected_types[0]->id()) {
-#define PROCESS(InType)                                                                \
-  case InType::type_id: {                                                              \
-    using CType = typename arrow::TypeTraits<InType>::CType;                           \
-    impl_.reset(new SortOnekeyKernel<InType>(ctx, result_schema, key_projector,        \
-                                                    key_field_list, sort_directions,   \
-                                                    nulls_order, NaN_check));          \
+#define PROCESS(InType)                                                         \
+  case InType::type_id: {                                                       \
+    using CType = typename arrow::TypeTraits<InType>::CType;                    \
+    impl_.reset(new SortOnekeyKernel<InType>(ctx, result_schema, key_projector, \
+                                             key_field_list, sort_directions,   \
+                                             nulls_order, NaN_check));          \
   } break;
           PROCESS_SUPPORTED_TYPES(PROCESS)
 #undef PROCESS
@@ -1840,12 +1816,12 @@ SortArraysToIndicesKernel::SortArraysToIndicesKernel(
             nulls_order, NaN_check));
       } else {
         switch (key_field_list[0]->type()->id()) {
-#define PROCESS(InType)                                                                \
-  case InType::type_id: {                                                              \
-    using CType = typename arrow::TypeTraits<InType>::CType;                           \
-    impl_.reset(new SortOnekeyKernel<InType>(ctx, result_schema, key_projector,        \
-                                             key_field_list, sort_directions,          \
-                                             nulls_order, NaN_check));                 \
+#define PROCESS(InType)                                                         \
+  case InType::type_id: {                                                       \
+    using CType = typename arrow::TypeTraits<InType>::CType;                    \
+    impl_.reset(new SortOnekeyKernel<InType>(ctx, result_schema, key_projector, \
+                                             key_field_list, sort_directions,   \
+                                             nulls_order, NaN_check));          \
   } break;
           PROCESS_SUPPORTED_TYPES(PROCESS)
 #undef PROCESS
@@ -1859,12 +1835,13 @@ SortArraysToIndicesKernel::SortArraysToIndicesKernel(
   } else {
     if (do_codegen) {
       // Will use Sort with Codegen for multiple-key sort
-      impl_.reset(new Impl(ctx, result_schema, key_projector, projected_types, 
+      impl_.reset(new Impl(ctx, result_schema, key_projector, projected_types,
                            key_field_list, sort_directions, nulls_order, NaN_check));
     } else {
       // Will use Sort without Codegen for multiple-key sort
-      impl_.reset(new SortMultiplekeyKernel(ctx, result_schema, key_projector, 
-          projected_types, key_field_list, sort_directions, nulls_order, NaN_check));
+      impl_.reset(new SortMultiplekeyKernel(ctx, result_schema, key_projector,
+                                            projected_types, key_field_list,
+                                            sort_directions, nulls_order, NaN_check));
     }
   }
   kernel_name_ = "SortArraysToIndicesKernel";
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc
index 7175cf164..e036405b5 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc
@@ -216,8 +216,9 @@ class WholeStageCodeGenKernel::Impl {
   }
 
   /* *
-   * Expecting insert node is a function node whose function name is "child", and real
-   * function is its first child, if who has two children, second one is the next child.
+   * Expecting insert node is a function node whose function name is "child",
+   * and real function is its first child, if who has two children, second one
+   * is the next child.
    * */
   arrow::Status ParseNodeTree(std::shared_ptr<gandiva::Node> root_node,
                               int* hash_relation_index,
@@ -492,9 +493,9 @@ class TypedWholeStageCodeGenImpl : public CodeGenBase {
       codes_ss << "return arrow::Status::OK();" << std::endl;
       codes_ss << "} // End of ProcessAndCacheOne" << std::endl << std::endl;
       codes_ss << "bool HasNext() override { return !should_stop_; }" << std::endl;
-      codes_ss
-          << "arrow::Status Next(std::shared_ptr<arrow::RecordBatch>* out) override {"
-          << std::endl;
+      codes_ss << "arrow::Status Next(std::shared_ptr<arrow::RecordBatch>* "
+                  "out) override {"
+               << std::endl;
       codes_ss << "uint64_t out_length = 0;" << std::endl;
       codes_ss << "int gp_idx = 0;" << std::endl;
     } else if (is_aggr_ && is_smj_) {
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc
index d8ec9db3c..bbd02e4ff 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_kernel.cc
@@ -26,60 +26,59 @@ namespace extra {
 
 class WindowAggregateFunctionKernel::ActionFactory {
  public:
-  ActionFactory(std::shared_ptr<ActionBase> action) {
-    action_ = action;
-  }
+  ActionFactory(std::shared_ptr<ActionBase> action) { action_ = action; }
 
-  static arrow::Status Make(std::string action_name,
-                            arrow::compute::ExecContext *ctx,
+  static arrow::Status Make(std::string action_name, arrow::compute::ExecContext* ctx,
                             std::shared_ptr<arrow::DataType> type,
                             std::shared_ptr<arrow::DataType> return_type,
-                            std::shared_ptr<ActionFactory> *out) {
+                            std::shared_ptr<ActionFactory>* out) {
     std::shared_ptr<ActionBase> action;
     if (action_name == "sum") {
       RETURN_NOT_OK(MakeSumAction(ctx, type, {return_type}, &action));
     } else if (action_name == "avg") {
       RETURN_NOT_OK(MakeAvgAction(ctx, type, {return_type}, &action));
     } else {
-      return arrow::Status::Invalid("window aggregate function: unsupported action name: " + action_name);
+      return arrow::Status::Invalid(
+          "window aggregate function: unsupported action name: " + action_name);
     }
     *out = std::make_shared<ActionFactory>(action);
     return arrow::Status::OK();
   }
 
-  std::shared_ptr<ActionBase> Get() {
-    return action_;
-  }
+  std::shared_ptr<ActionBase> Get() { return action_; }
 
  private:
   std::shared_ptr<ActionBase> action_;
 };
 
-arrow::Status WindowAggregateFunctionKernel::Make(arrow::compute::ExecContext *ctx,
-                                                  std::string function_name,
-                                                  std::vector<std::shared_ptr<arrow::DataType>> type_list,
-                                                  std::shared_ptr<arrow::DataType> result_type,
-                                                  std::shared_ptr<KernalBase> *out) {
+arrow::Status WindowAggregateFunctionKernel::Make(
+    arrow::compute::ExecContext* ctx, std::string function_name,
+    std::vector<std::shared_ptr<arrow::DataType>> type_list,
+    std::shared_ptr<arrow::DataType> result_type, std::shared_ptr<KernalBase>* out) {
   if (type_list.size() != 1) {
-    return arrow::Status::Invalid("given more than 1 input argument for window function: " + function_name);
+    return arrow::Status::Invalid(
+        "given more than 1 input argument for window function: " + function_name);
   }
   std::shared_ptr<ActionFactory> action;
 
   if (function_name == "sum" || function_name == "avg") {
-    RETURN_NOT_OK(ActionFactory::Make(function_name, ctx, type_list[0], result_type, &action));
+    RETURN_NOT_OK(
+        ActionFactory::Make(function_name, ctx, type_list[0], result_type, &action));
   } else {
     return arrow::Status::Invalid("window function not supported: " + function_name);
   }
-  auto accumulated_group_ids = std::vector<std::shared_ptr<arrow::Int32Array >>();
-  *out = std::make_shared<WindowAggregateFunctionKernel>(ctx, type_list, result_type, accumulated_group_ids, action);
+  auto accumulated_group_ids = std::vector<std::shared_ptr<arrow::Int32Array>>();
+  *out = std::make_shared<WindowAggregateFunctionKernel>(ctx, type_list, result_type,
+                                                         accumulated_group_ids, action);
   return arrow::Status::OK();
 }
 
-WindowAggregateFunctionKernel::WindowAggregateFunctionKernel(arrow::compute::ExecContext *ctx,
-                                                             std::vector<std::shared_ptr<arrow::DataType>> type_list,
-                                                             std::shared_ptr<arrow::DataType> result_type,
-                                                             std::vector<std::shared_ptr<arrow::Int32Array>> accumulated_group_ids,
-                                                             std::shared_ptr<ActionFactory> action) {
+WindowAggregateFunctionKernel::WindowAggregateFunctionKernel(
+    arrow::compute::ExecContext* ctx,
+    std::vector<std::shared_ptr<arrow::DataType>> type_list,
+    std::shared_ptr<arrow::DataType> result_type,
+    std::vector<std::shared_ptr<arrow::Int32Array>> accumulated_group_ids,
+    std::shared_ptr<ActionFactory> action) {
   ctx_ = ctx;
   type_list_ = type_list;
   result_type_ = result_type;
@@ -94,7 +93,7 @@ WindowAggregateFunctionKernel::WindowAggregateFunctionKernel(arrow::compute::Exe
  * | 3 |     1 |   |     1 |   3 |          |      3 |
  * | 6 |     0 |                            |      8 |
  */
-arrow::Status WindowAggregateFunctionKernel::Evaluate(const ArrayList &in) {
+arrow::Status WindowAggregateFunctionKernel::Evaluate(const ArrayList& in) {
   // abstract following code to do common inter-window processing
 
   int32_t max_group_id = 0;
@@ -114,7 +113,8 @@ arrow::Status WindowAggregateFunctionKernel::Evaluate(const ArrayList &in) {
   action_input_data.push_back(in[0]);
   std::function<arrow::Status(int)> func;
   std::function<arrow::Status()> null_func;
-  RETURN_NOT_OK(action_->Get()->Submit(action_input_data, max_group_id, &func, &null_func));
+  RETURN_NOT_OK(
+      action_->Get()->Submit(action_input_data, max_group_id, &func, &null_func));
 
   for (int row_id = 0; row_id < group_id_array->length(); row_id++) {
     if (group_ids->IsNull(row_id)) {
@@ -128,48 +128,52 @@ arrow::Status WindowAggregateFunctionKernel::Evaluate(const ArrayList &in) {
   return arrow::Status::OK();
 }
 
-#define PROCESS_SUPPORTED_TYPES_WINDOW(PROC) \
-  PROC(arrow::UInt8Type, arrow::UInt8Builder, arrow::UInt8Array)              \
-  PROC(arrow::Int8Type, arrow::Int8Builder, arrow::Int8Array)                 \
-  PROC(arrow::UInt16Type, arrow::UInt16Builder, arrow::UInt16Array)           \
-  PROC(arrow::Int16Type, arrow::Int16Builder, arrow::Int16Array)              \
-  PROC(arrow::UInt32Type, arrow::UInt32Builder, arrow::UInt32Array)           \
-  PROC(arrow::Int32Type, arrow::Int32Builder, arrow::Int32Array)              \
-  PROC(arrow::UInt64Type, arrow::UInt64Builder, arrow::UInt64Array)           \
-  PROC(arrow::Int64Type, arrow::Int64Builder, arrow::Int64Array)              \
-  PROC(arrow::FloatType, arrow::FloatBuilder, arrow::FloatArray)              \
-  PROC(arrow::DoubleType, arrow::DoubleBuilder, arrow::DoubleArray)           \
+#define PROCESS_SUPPORTED_TYPES_WINDOW(PROC)                        \
+  PROC(arrow::UInt8Type, arrow::UInt8Builder, arrow::UInt8Array)    \
+  PROC(arrow::Int8Type, arrow::Int8Builder, arrow::Int8Array)       \
+  PROC(arrow::UInt16Type, arrow::UInt16Builder, arrow::UInt16Array) \
+  PROC(arrow::Int16Type, arrow::Int16Builder, arrow::Int16Array)    \
+  PROC(arrow::UInt32Type, arrow::UInt32Builder, arrow::UInt32Array) \
+  PROC(arrow::Int32Type, arrow::Int32Builder, arrow::Int32Array)    \
+  PROC(arrow::UInt64Type, arrow::UInt64Builder, arrow::UInt64Array) \
+  PROC(arrow::Int64Type, arrow::Int64Builder, arrow::Int64Array)    \
+  PROC(arrow::FloatType, arrow::FloatBuilder, arrow::FloatArray)    \
+  PROC(arrow::DoubleType, arrow::DoubleBuilder, arrow::DoubleArray) \
   PROC(arrow::Decimal128Type, arrow::Decimal128Builder, arrow::Decimal128Array)
 
-arrow::Status WindowAggregateFunctionKernel::Finish(ArrayList *out) {
+arrow::Status WindowAggregateFunctionKernel::Finish(ArrayList* out) {
   std::shared_ptr<arrow::DataType> value_type = result_type_;
   switch (value_type->id()) {
-
-#define PROCESS(VALUE_TYPE, BUILDER_TYPE, ARRAY_TYPE)                                     \
-  case VALUE_TYPE::type_id: {                                                             \
-    RETURN_NOT_OK((Finish0<VALUE_TYPE, BUILDER_TYPE, ARRAY_TYPE>(out, value_type)));      \
+#define PROCESS(VALUE_TYPE, BUILDER_TYPE, ARRAY_TYPE)                                \
+  case VALUE_TYPE::type_id: {                                                        \
+    RETURN_NOT_OK((Finish0<VALUE_TYPE, BUILDER_TYPE, ARRAY_TYPE>(out, value_type))); \
   } break;
 
     PROCESS_SUPPORTED_TYPES_WINDOW(PROCESS)
 #undef PROCESS
-    default: return arrow::Status::Invalid("window function: unsupported input type: " + value_type->name());
+    default:
+      return arrow::Status::Invalid("window function: unsupported input type: " +
+                                    value_type->name());
   }
   return arrow::Status::OK();
 }
 
-template<typename ValueType, typename BuilderType, typename ArrayType>
-arrow::Status WindowAggregateFunctionKernel::Finish0(ArrayList *out, std::shared_ptr<arrow::DataType> data_type) {
+template <typename ValueType, typename BuilderType, typename ArrayType>
+arrow::Status WindowAggregateFunctionKernel::Finish0(
+    ArrayList* out, std::shared_ptr<arrow::DataType> data_type) {
   ArrayList action_output;
   RETURN_NOT_OK(action_->Get()->Finish(&action_output));
   if (action_output.size() != 1) {
-    return arrow::Status::Invalid("window function: got invalid result from corresponding action");
+    return arrow::Status::Invalid(
+        "window function: got invalid result from corresponding action");
   }
 
   auto action_output_values = std::dynamic_pointer_cast<ArrayType>(action_output.at(0));
 
-  for (const auto &accumulated_group_ids_single_part : accumulated_group_ids_) {
+  for (const auto& accumulated_group_ids_single_part : accumulated_group_ids_) {
     std::shared_ptr<BuilderType> output_builder;
-    ARROW_ASSIGN_OR_RAISE(output_builder, (createBuilder<ValueType, BuilderType>(data_type)))
+    ARROW_ASSIGN_OR_RAISE(output_builder,
+                          (createBuilder<ValueType, BuilderType>(data_type)))
 
     for (int i = 0; i < accumulated_group_ids_single_part->length(); i++) {
       if (accumulated_group_ids_single_part->IsNull(i)) {
@@ -186,38 +190,40 @@ arrow::Status WindowAggregateFunctionKernel::Finish0(ArrayList *out, std::shared
   return arrow::Status::OK();
 }
 
-template<typename ValueType, typename BuilderType>
-typename arrow::enable_if_decimal128<ValueType, arrow::Result<std::shared_ptr<BuilderType>>>
-    WindowAggregateFunctionKernel::createBuilder(std::shared_ptr<arrow::DataType> data_type) {
+template <typename ValueType, typename BuilderType>
+typename arrow::enable_if_decimal128<ValueType,
+                                     arrow::Result<std::shared_ptr<BuilderType>>>
+WindowAggregateFunctionKernel::createBuilder(std::shared_ptr<arrow::DataType> data_type) {
   return std::make_shared<BuilderType>(data_type, ctx_->memory_pool());
 }
 
-template<typename ValueType, typename BuilderType>
+template <typename ValueType, typename BuilderType>
 typename arrow::enable_if_number<ValueType, arrow::Result<std::shared_ptr<BuilderType>>>
-    WindowAggregateFunctionKernel::createBuilder(std::shared_ptr<arrow::DataType> data_type) {
+WindowAggregateFunctionKernel::createBuilder(std::shared_ptr<arrow::DataType> data_type) {
   return std::make_shared<BuilderType>(ctx_->memory_pool());
 }
 
-WindowRankKernel::WindowRankKernel(arrow::compute::ExecContext *ctx,
-                                   std::vector<std::shared_ptr<arrow::DataType>> type_list,
-                                   std::shared_ptr<WindowSortKernel::Impl> sorter,
-                                   bool desc) {
+WindowRankKernel::WindowRankKernel(
+    arrow::compute::ExecContext* ctx,
+    std::vector<std::shared_ptr<arrow::DataType>> type_list,
+    std::shared_ptr<WindowSortKernel::Impl> sorter, bool desc) {
   ctx_ = ctx;
   type_list_ = type_list;
   sorter_ = sorter;
   desc_ = desc;
 }
 
-arrow::Status WindowRankKernel::Make(arrow::compute::ExecContext *ctx,
-                                     std::string function_name,
-                                     std::vector<std::shared_ptr<arrow::DataType>> type_list,
-                                     std::shared_ptr<KernalBase> *out,
-                                     bool desc) {
+arrow::Status WindowRankKernel::Make(
+    arrow::compute::ExecContext* ctx, std::string function_name,
+    std::vector<std::shared_ptr<arrow::DataType>> type_list,
+    std::shared_ptr<KernalBase>* out, bool desc) {
   std::vector<std::shared_ptr<arrow::Field>> key_fields;
   for (int i = 0; i < type_list.size(); i++) {
-    key_fields.push_back(std::make_shared<arrow::Field>("sort_key" + std::to_string(i), type_list.at(i)));
+    key_fields.push_back(
+        std::make_shared<arrow::Field>("sort_key" + std::to_string(i), type_list.at(i)));
   }
-  std::shared_ptr<arrow::Schema> result_schema = std::make_shared<arrow::Schema>(key_fields);
+  std::shared_ptr<arrow::Schema> result_schema =
+      std::make_shared<arrow::Schema>(key_fields);
 
   std::shared_ptr<WindowSortKernel::Impl> sorter;
   // fixme null ordering flag and collation flag
@@ -226,27 +232,27 @@ arrow::Status WindowRankKernel::Make(arrow::compute::ExecContext *ctx,
   if (key_fields.size() == 1) {
     std::shared_ptr<arrow::Field> key_field = key_fields[0];
     if (key_field->type()->id() == arrow::Type::STRING) {
-      sorter.reset(
-          new WindowSortOnekeyKernel<arrow::StringType, std::string>(ctx, key_fields,
-                                                                     result_schema, nulls_first, asc));
+      sorter.reset(new WindowSortOnekeyKernel<arrow::StringType, std::string>(
+          ctx, key_fields, result_schema, nulls_first, asc));
     } else {
       switch (key_field->type()->id()) {
-#define PROCESS(InType, BUILDER_TYPE, ARRAY_TYPE)                                                      \
-  case InType::type_id: {                                                     \
-    using CType = typename TypeTraits<InType>::CType;                  \
-    sorter.reset(new WindowSortOnekeyKernel<InType, CType>(ctx, key_fields, result_schema, nulls_first, asc));  \
+#define PROCESS(InType, BUILDER_TYPE, ARRAY_TYPE)           \
+  case InType::type_id: {                                   \
+    using CType = typename TypeTraits<InType>::CType;       \
+    sorter.reset(new WindowSortOnekeyKernel<InType, CType>( \
+        ctx, key_fields, result_schema, nulls_first, asc)); \
   } break;
         PROCESS_SUPPORTED_TYPES_WINDOW(PROCESS)
 #undef PROCESS
         default: {
           std::cout << "WindowRankKernel type not supported, type is "
                     << key_field->type() << std::endl;
-        }
-          break;
+        } break;
       }
     }
   } else {
-    sorter.reset(new WindowSortKernel::Impl(ctx, key_fields, result_schema, nulls_first, asc));
+    sorter.reset(
+        new WindowSortKernel::Impl(ctx, key_fields, result_schema, nulls_first, asc));
     auto status = sorter->LoadJITFunction(key_fields, result_schema);
     if (!status.ok()) {
       std::cout << "LoadJITFunction failed, msg is " << status.message() << std::endl;
@@ -257,12 +263,12 @@ arrow::Status WindowRankKernel::Make(arrow::compute::ExecContext *ctx,
   return arrow::Status::OK();
 }
 
-arrow::Status WindowRankKernel::Evaluate(const ArrayList &in) {
+arrow::Status WindowRankKernel::Evaluate(const ArrayList& in) {
   input_cache_.push_back(in);
   return arrow::Status::OK();
 }
 
-arrow::Status WindowRankKernel::Finish(ArrayList *out) {
+arrow::Status WindowRankKernel::Finish(ArrayList* out) {
   std::vector<ArrayList> values;
   std::vector<std::shared_ptr<arrow::Int32Array>> group_ids;
 
@@ -270,7 +276,8 @@ arrow::Status WindowRankKernel::Finish(ArrayList *out) {
   std::cout << "[window kernel] Entering Rank Kernel's finish method... " << std::endl;
 #endif
 #ifdef DEBUG
-  std::cout << "[window kernel] Splitting all input batches to key/value batches... " << std::endl;
+  std::cout << "[window kernel] Splitting all input batches to key/value batches... "
+            << std::endl;
 #endif
   for (auto batch : input_cache_) {
     ArrayList values_batch;
@@ -315,7 +322,8 @@ arrow::Status WindowRankKernel::Finish(ArrayList *out) {
   }
 
 #ifdef DEBUG
-  std::cout << "[window kernel] Creating indexed array based on group IDs... " << std::endl;
+  std::cout << "[window kernel] Creating indexed array based on group IDs... "
+            << std::endl;
 #endif
   for (int i = 0; i < group_ids.size(); i++) {
     auto slice = group_ids.at(i);
@@ -324,7 +332,8 @@ arrow::Status WindowRankKernel::Finish(ArrayList *out) {
         continue;
       }
       uint64_t partition_id = slice->GetView(j);
-      partitions_to_sort.at(partition_id).push_back(std::make_shared<ArrayItemIndex>(i, j));
+      partitions_to_sort.at(partition_id)
+          .push_back(std::make_shared<ArrayItemIndex>(i, j));
     }
   }
 #ifdef DEBUG
@@ -345,21 +354,23 @@ arrow::Status WindowRankKernel::Finish(ArrayList *out) {
 #endif
     sorted_partitions.push_back(std::move(sorted_partition));
   }
-  int32_t **rank_array = new int32_t*[group_ids.size()];
+  int32_t** rank_array = new int32_t*[group_ids.size()];
   for (int i = 0; i < group_ids.size(); i++) {
     *(rank_array + i) = new int32_t[group_ids.at(i)->length()];
   }
   for (int i = 0; i <= max_group_id; i++) {
 #ifdef DEBUG
-    std::cout << "[window kernel] Generating rank result on a single partition... " << std::endl;
+    std::cout << "[window kernel] Generating rank result on a single partition... "
+              << std::endl;
 #endif
-    std::vector<std::shared_ptr<ArrayItemIndex>> sorted_partition = sorted_partitions.at(i);
+    std::vector<std::shared_ptr<ArrayItemIndex>> sorted_partition =
+        sorted_partitions.at(i);
     int assumed_rank = 0;
     for (int j = 0; j < sorted_partition.size(); j++) {
-      ++assumed_rank; // rank value starts from 1
+      ++assumed_rank;  // rank value starts from 1
       std::shared_ptr<ArrayItemIndex> index = sorted_partition.at(j);
       if (j == 0) {
-        rank_array[index->array_id][index->id] = 1; // rank value starts from 1
+        rank_array[index->array_id][index->id] = 1;  // rank value starts from 1
         continue;
       }
       std::shared_ptr<ArrayItemIndex> last_index = sorted_partition.at(j - 1);
@@ -368,17 +379,17 @@ arrow::Status WindowRankKernel::Finish(ArrayList *out) {
         bool s;
         std::shared_ptr<arrow::DataType> type = type_list_.at(column_id);
         switch (type->id()) {
-#define PROCESS(InType, BUILDER_TYPE, ARRAY_TYPE)                                                       \
-  case InType::type_id: {                                                     \
-      RETURN_NOT_OK(AreTheSameValue<ARRAY_TYPE>(values, column_id, index, last_index, &s));  \
+#define PROCESS(InType, BUILDER_TYPE, ARRAY_TYPE)                               \
+  case InType::type_id: {                                                       \
+    RETURN_NOT_OK(                                                              \
+        AreTheSameValue<ARRAY_TYPE>(values, column_id, index, last_index, &s)); \
   } break;
           PROCESS_SUPPORTED_TYPES_WINDOW(PROCESS)
 #undef PROCESS
           default: {
-            std::cout << "WindowRankKernel: type not supported: "
-                      << type->ToString() << std::endl; // todo use arrow::Status
-          }
-            break;
+            std::cout << "WindowRankKernel: type not supported: " << type->ToString()
+                      << std::endl;  // todo use arrow::Status
+          } break;
         }
         if (!s) {
           same = false;
@@ -386,7 +397,8 @@ arrow::Status WindowRankKernel::Finish(ArrayList *out) {
         }
       }
       if (same) {
-        rank_array[index->array_id][index->id] = rank_array[last_index->array_id][last_index->id];
+        rank_array[index->array_id][index->id] =
+            rank_array[last_index->array_id][last_index->id];
         continue;
       }
       rank_array[index->array_id][index->id] = assumed_rank;
@@ -397,13 +409,15 @@ arrow::Status WindowRankKernel::Finish(ArrayList *out) {
   }
 
 #ifdef DEBUG
-  std::cout << "[window kernel] Building overall associated rank results... " << std::endl;
+  std::cout << "[window kernel] Building overall associated rank results... "
+            << std::endl;
 #endif
   for (int i = 0; i < input_cache_.size(); i++) {
     auto batch = input_cache_.at(i);
     auto group_id_column_slice = batch.at(type_list_.size());
     int slice_length = group_id_column_slice->length();
-    std::shared_ptr<arrow::Int32Builder> rank_builder = std::make_shared<arrow::Int32Builder>(ctx_->memory_pool());
+    std::shared_ptr<arrow::Int32Builder> rank_builder =
+        std::make_shared<arrow::Int32Builder>(ctx_->memory_pool());
     for (int j = 0; j < slice_length; j++) {
       RETURN_NOT_OK(rank_builder->Append(rank_array[i][j]));
     }
@@ -415,25 +429,28 @@ arrow::Status WindowRankKernel::Finish(ArrayList *out) {
   std::cout << "[window kernel] Finished. " << std::endl;
 #endif
   for (int i = 0; i < group_ids.size(); i++) {
-    delete[] *(rank_array + i);
+    delete[] * (rank_array + i);
   }
   delete[] rank_array;
   return arrow::Status::OK();
 }
 
-static arrow::Status EncodeIndices( std::vector<std::shared_ptr<ArrayItemIndex>> in, std::shared_ptr<arrow::Array> *out){
+static arrow::Status EncodeIndices(std::vector<std::shared_ptr<ArrayItemIndex>> in,
+                                   std::shared_ptr<arrow::Array>* out) {
   arrow::UInt64Builder builder;
   for (const auto& each : in) {
-    uint64_t encoded = ((uint64_t) (each->array_id) << 16U) ^ ((uint64_t) (each->id));
+    uint64_t encoded = ((uint64_t)(each->array_id) << 16U) ^ ((uint64_t)(each->id));
     RETURN_NOT_OK(builder.Append(encoded));
   }
   RETURN_NOT_OK(builder.Finish(out));
   return arrow::Status::OK();
 }
 
-static arrow::Status DecodeIndices(std::shared_ptr<arrow::Array> in, std::vector<std::shared_ptr<ArrayItemIndex>> *out){
+static arrow::Status DecodeIndices(std::shared_ptr<arrow::Array> in,
+                                   std::vector<std::shared_ptr<ArrayItemIndex>>* out) {
   std::vector<std::shared_ptr<ArrayItemIndex>> v;
-  std::shared_ptr<arrow::UInt64Array> selected = std::dynamic_pointer_cast<arrow::UInt64Array>(in);
+  std::shared_ptr<arrow::UInt64Array> selected =
+      std::dynamic_pointer_cast<arrow::UInt64Array>(in);
   for (int i = 0; i < selected->length(); i++) {
     uint64_t encoded = selected->GetView(i);
     uint16_t array_id = (encoded & 0xFFFF0000U) >> 16U;
@@ -452,8 +469,9 @@ arrow::Status WindowRankKernel::SortToIndicesPrepare(std::vector<ArrayList> valu
   // todo sort algorithm
 }
 
-arrow::Status WindowRankKernel::SortToIndicesFinish(std::vector<std::shared_ptr<ArrayItemIndex>> elements_to_sort,
-                                                    std::vector<std::shared_ptr<ArrayItemIndex>> *offsets) {
+arrow::Status WindowRankKernel::SortToIndicesFinish(
+    std::vector<std::shared_ptr<ArrayItemIndex>> elements_to_sort,
+    std::vector<std::shared_ptr<ArrayItemIndex>>* offsets) {
   std::shared_ptr<arrow::Array> in;
   std::shared_ptr<arrow::Array> out;
   RETURN_NOT_OK(EncodeIndices(elements_to_sort, &in));
@@ -465,17 +483,23 @@ arrow::Status WindowRankKernel::SortToIndicesFinish(std::vector<std::shared_ptr<
   // todo sort algorithm
 }
 
-template<typename ArrayType>
-arrow::Status WindowRankKernel::AreTheSameValue(const std::vector<ArrayList>& values, int column, std::shared_ptr<ArrayItemIndex> i, std::shared_ptr<ArrayItemIndex> j, bool* out) {
-  auto typed_array_i = std::dynamic_pointer_cast<ArrayType>(values.at(i->array_id).at(column));
-  auto typed_array_j = std::dynamic_pointer_cast<ArrayType>(values.at(j->array_id).at(column));
+template <typename ArrayType>
+arrow::Status WindowRankKernel::AreTheSameValue(const std::vector<ArrayList>& values,
+                                                int column,
+                                                std::shared_ptr<ArrayItemIndex> i,
+                                                std::shared_ptr<ArrayItemIndex> j,
+                                                bool* out) {
+  auto typed_array_i =
+      std::dynamic_pointer_cast<ArrayType>(values.at(i->array_id).at(column));
+  auto typed_array_j =
+      std::dynamic_pointer_cast<ArrayType>(values.at(j->array_id).at(column));
   *out = (typed_array_i->GetView(i->id) == typed_array_j->GetView(j->id));
   return arrow::Status::OK();
 }
 
 #undef PROCESS_SUPPORTED_TYPES_WINDOW
 
-}
-}
-}
-}
\ No newline at end of file
+}  // namespace extra
+}  // namespace arrowcompute
+}  // namespace codegen
+}  // namespace sparkcolumnarplugin
\ No newline at end of file
diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h
index 087c15577..e1ecbed4e 100644
--- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h
+++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/window_sort_kernel.h
@@ -16,6 +16,7 @@
  */
 
 #include <arrow/array/concatenate.h>
+#include <arrow/buffer.h>
 #include <arrow/compute/api.h>
 #include <arrow/type.h>
 #include <arrow/type_fwd.h>
@@ -32,13 +33,11 @@
 #include "codegen/arrow_compute/ext/code_generator_base.h"
 #include "codegen/arrow_compute/ext/codegen_common.h"
 #include "codegen/arrow_compute/ext/kernels_ext.h"
-#include "third_party/ska_sort.hpp"
 #include "precompile/array.h"
 #include "precompile/builder.h"
 #include "precompile/type.h"
-#include <arrow/buffer.h>
-#include "codegen/arrow_compute/ext/code_generator_base.h"
-#include <algorithm>
+#include "third_party/ska_sort.hpp"
+#include "third_party/timsort.hpp"
 
 namespace sparkcolumnarplugin {
 namespace codegen {
@@ -47,73 +46,6 @@ namespace extra {
 using ArrayList = std::vector<std::shared_ptr<arrow::Array>>;
 using namespace sparkcolumnarplugin::precompile;
 
-class AppenderBase {
- public:
-  virtual ~AppenderBase() {}
-
-  virtual arrow::Status AddArray(const std::shared_ptr<arrow::Array>& arr)  {
-    return arrow::Status::NotImplemented("AppenderBase AddArray is abstract.");
-  }
-
-  virtual arrow::Status Append(uint16_t& array_id, uint16_t& item_id) {
-    return arrow::Status::NotImplemented("AppenderBase Append is abstract.");
-  }
-
-  virtual arrow::Status Finish(std::shared_ptr<arrow::Array>* out_) {
-    return arrow::Status::NotImplemented("AppenderBase Finish is abstract.");
-  }
-
-  virtual arrow::Status Reset() {
-    return arrow::Status::NotImplemented("AppenderBase Reset is abstract.");
-  }
-};
-
-template <class DataType>
-class ArrayAppender : public AppenderBase {
- public:
-  ArrayAppender(arrow::compute::ExecContext* ctx) : ctx_(ctx) {
-    std::unique_ptr<arrow::ArrayBuilder> array_builder;
-    arrow::MakeBuilder(
-        ctx_->memory_pool(), arrow::TypeTraits<DataType>::type_singleton(), &array_builder);
-    builder_.reset(
-        arrow::internal::checked_cast<BuilderType_*>(array_builder.release()));
-  }
-  ~ArrayAppender() {}
-
-  arrow::Status AddArray(const std::shared_ptr<arrow::Array>& arr)  {
-    auto typed_arr_ = std::dynamic_pointer_cast<ArrayType_>(arr);
-    cached_arr_.emplace_back(typed_arr_);
-    return arrow::Status::OK();
-  }
-
-  arrow::Status Append(uint16_t& array_id, uint16_t& item_id) {
-    if (!cached_arr_[array_id]->IsNull(item_id)) {
-      auto val = cached_arr_[array_id]->GetView(item_id);
-      builder_->Append(cached_arr_[array_id]->GetView(item_id));
-    } else {
-      builder_->AppendNull();
-    }
-    return arrow::Status::OK();
-  }
-
-  arrow::Status Finish(std::shared_ptr<arrow::Array>* out_) {
-    builder_->Finish(out_);
-    return arrow::Status::OK();
-  }
-
-  arrow::Status Reset() {
-    builder_->Reset();
-    return arrow::Status::OK();
-  }
-
- private:
-  using BuilderType_ = typename arrow::TypeTraits<DataType>::BuilderType;
-  using ArrayType_ = typename arrow::TypeTraits<DataType>::ArrayType;
-  std::unique_ptr<BuilderType_> builder_;
-  std::vector<std::shared_ptr<ArrayType_>> cached_arr_;
-  arrow::compute::ExecContext* ctx_;
-};
-
 ///////////////  SortArraysToIndices  ////////////////
 class WindowSortKernel::Impl {
  public:
@@ -125,15 +57,14 @@ class WindowSortKernel::Impl {
     for (auto field : key_field_list) {
       auto indices = result_schema->GetAllFieldIndices(field->name());
       if (indices.size() != 1) {
-        std::cout << "[ERROR] WindowSortKernel::Impl can't find key "
-                  << field->ToString() << " from " << result_schema->ToString()
-                  << std::endl;
+        std::cout << "[ERROR] WindowSortKernel::Impl can't find key " << field->ToString()
+                  << " from " << result_schema->ToString() << std::endl;
         throw;
       }
       key_index_list_.push_back(indices[0]);
     }
   }
-  virtual ~Impl(){}
+  virtual ~Impl() {}
   virtual arrow::Status LoadJITFunction(
       std::vector<std::shared_ptr<arrow::Field>> key_field_list,
       std::shared_ptr<arrow::Schema> result_schema) {
@@ -177,7 +108,8 @@ class WindowSortKernel::Impl {
     return arrow::Status::OK();
   }
 
-  virtual arrow::Status Finish(std::shared_ptr<arrow::Array> in, std::shared_ptr<arrow::Array>* out) {
+  virtual arrow::Status Finish(std::shared_ptr<arrow::Array> in,
+                               std::shared_ptr<arrow::Array>* out) {
     RETURN_NOT_OK(sorter->Finish(in, out));
     return arrow::Status::OK();
   }
@@ -285,6 +217,7 @@ class WindowSortKernel::Impl {
 #include "precompile/builder.h"
 #include "precompile/type.h"
 #include "third_party/ska_sort.hpp"
+#include "third_party/timsort.hpp"
 using namespace sparkcolumnarplugin::precompile;
 
 class TypedSorterImpl : public CodeGenBase {
@@ -294,27 +227,27 @@ class TypedSorterImpl : public CodeGenBase {
   arrow::Status Evaluate(const ArrayList& in) override {
     num_batches_++;
     )" + cached_insert_str +
-        R"(
+           R"(
     return arrow::Status::OK();
   }
 
   arrow::Status FinishInternal(std::shared_ptr<arrow::Array> in, std::shared_ptr<FixedSizeBinaryArray>* out) {
     )" + comp_func_str +
-        R"(
+           R"(
 
     std::shared_ptr<arrow::UInt64Array> selected = std::dynamic_pointer_cast<arrow::UInt64Array>(in);
     int items_total = selected->length();
 
     // initiate buffer for all arrays
     std::shared_ptr<arrow::Buffer> indices_buf;
-    int64_t buf_size = items_total * sizeof(ArrayItemIndex);
+    int64_t buf_size = items_total * sizeof(ArrayItemIndexS);
     auto maybe_buffer = arrow::AllocateBuffer(buf_size, ctx_->memory_pool());
     indices_buf = *std::move(maybe_buffer);
 
     // start to partition not_null with null
-    ArrayItemIndex* indices_begin =
-        reinterpret_cast<ArrayItemIndex*>(indices_buf->mutable_data());
-    ArrayItemIndex* indices_end = indices_begin + items_total;
+    ArrayItemIndexS* indices_begin =
+        reinterpret_cast<ArrayItemIndexS*>(indices_buf->mutable_data());
+    ArrayItemIndexS* indices_end = indices_begin + items_total;
 
     int64_t indices_i = 0;
 
@@ -330,9 +263,9 @@ class TypedSorterImpl : public CodeGenBase {
       indices_i++;
     }
     )" + sort_func_str +
-        R"(
+           R"(
     std::shared_ptr<arrow::FixedSizeBinaryType> out_type;
-    RETURN_NOT_OK(MakeFixedSizeBinaryType(sizeof(ArrayItemIndex) / sizeof(int32_t), &out_type));
+    RETURN_NOT_OK(MakeFixedSizeBinaryType(sizeof(ArrayItemIndexS) / sizeof(int32_t), &out_type));
     RETURN_NOT_OK(MakeFixedSizeBinaryArray(out_type, items_total, indices_buf, out));
     return arrow::Status::OK();
   }
@@ -341,7 +274,7 @@ class TypedSorterImpl : public CodeGenBase {
     std::shared_ptr<FixedSizeBinaryArray> indices_out;
     RETURN_NOT_OK(FinishInternal(in, &indices_out));
     arrow::UInt64Builder builder;
-    auto *index = (ArrayItemIndex *) indices_out->value_data();
+    auto *index = (ArrayItemIndexS *) indices_out->value_data();
     for (int i = 0; i < indices_out->length(); i++) {
       uint64_t encoded = ((uint64_t) (index->array_id) << 16U) ^ ((uint64_t) (index->id));
       RETURN_NOT_OK(builder.Append(encoded));
@@ -353,7 +286,7 @@ class TypedSorterImpl : public CodeGenBase {
 
  private:
   )" + cached_variables_define_str +
-        R"(
+           R"(
   arrow::compute::ExecContext* ctx_;
   uint64_t num_batches_ = 0;
 
@@ -362,10 +295,10 @@ class TypedSorterImpl : public CodeGenBase {
     SorterResultIterator(arrow::compute::ExecContext* ctx,
                        std::shared_ptr<FixedSizeBinaryArray> indices_in,
    )" + result_iter_param_define_str +
-        R"(): ctx_(ctx), total_length_(indices_in->length()), indices_in_cache_(indices_in) {
+           R"(): ctx_(ctx), total_length_(indices_in->length()), indices_in_cache_(indices_in) {
      )" + result_iter_define_str +
-        R"(
-      indices_begin_ = (ArrayItemIndex*)indices_in->value_data();
+           R"(
+      indices_begin_ = (ArrayItemIndexS*)indices_in->value_data();
     }
 
     std::string ToString() override { return "SortArraysToIndicesResultIterator"; }
@@ -379,28 +312,28 @@ class TypedSorterImpl : public CodeGenBase {
 
     arrow::Status Next(std::shared_ptr<arrow::RecordBatch>* out) {
       auto length = (total_length_ - offset_) > )" +
-        std::to_string(GetBatchSize()) + R"( ? )" + std::to_string(GetBatchSize()) +
-        R"( : (total_length_ - offset_);
+           std::to_string(GetBatchSize()) + R"( ? )" + std::to_string(GetBatchSize()) +
+           R"( : (total_length_ - offset_);
       uint64_t count = 0;
       while (count < length) {
         auto item = indices_begin_ + offset_ + count++;
       )" + typed_build_str +
-        R"(
+           R"(
       }
       offset_ += length;
       )" + typed_res_array_build_str +
-        R"(
+           R"(
       *out = arrow::RecordBatch::Make(result_schema_, length, {)" +
-        typed_res_array_str + R"(});
+           typed_res_array_str + R"(});
       return arrow::Status::OK();
     }
 
    private:
    )" + result_variables_define_str +
-        R"(
+           R"(
     std::shared_ptr<FixedSizeBinaryArray> indices_in_cache_;
     uint64_t offset_ = 0;
-    ArrayItemIndex* indices_begin_;
+    ArrayItemIndexS* indices_begin_;
     const uint64_t total_length_;
     std::shared_ptr<arrow::Schema> result_schema_;
     arrow::compute::ExecContext* ctx_;
@@ -424,7 +357,8 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
   }
   std::string GetCompFunction(std::vector<int> sort_key_index_list) {
     std::stringstream ss;
-    ss << "auto comp = [this](const ArrayItemIndex& x, const ArrayItemIndex& y) {"
+    ss << "auto comp = [this](const ArrayItemIndexS& x, const ArrayItemIndexS& "
+          "y) {"
        << GetCompFunction_(0, sort_key_index_list) << "};";
     return ss.str();
   }
@@ -433,22 +367,30 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
 
     auto cur_key_id = sort_key_index_list[cur_key_index];
     // todo nulls last / nulls first
-    auto x_value = "cached_" + std::to_string(cur_key_id) + "_[x.array_id]->GetView(x.id)";
-    auto y_value = "cached_" + std::to_string(cur_key_id) + "_[y.array_id]->GetView(y.id)";
+    auto x_value =
+        "cached_" + std::to_string(cur_key_id) + "_[x.array_id]->GetView(x.id)";
+    auto y_value =
+        "cached_" + std::to_string(cur_key_id) + "_[y.array_id]->GetView(y.id)";
 
-    auto is_x_null = "cached_" + std::to_string(cur_key_id) + "_[x.array_id]->IsNull(x.id)";
-    auto is_y_null = "cached_" + std::to_string(cur_key_id) + "_[y.array_id]->IsNull(y.id)";
+    auto is_x_null =
+        "cached_" + std::to_string(cur_key_id) + "_[x.array_id]->IsNull(x.id)";
+    auto is_y_null =
+        "cached_" + std::to_string(cur_key_id) + "_[y.array_id]->IsNull(y.id)";
 
     if (asc_) {
       std::stringstream ss;
-      ss << "return " << is_x_null << " && " << is_y_null << " ? " << "false" << " : "
-      << "(" << is_x_null << " ? " << !nulls_first_ << " : "
-      << "(" << is_y_null << " ? " << nulls_first_ << " : "
-      << "(" << x_value << " < " << y_value << ")));\n";
+      ss << "return " << is_x_null << " && " << is_y_null << " ? "
+         << "false"
+         << " : "
+         << "(" << is_x_null << " ? " << !nulls_first_ << " : "
+         << "(" << is_y_null << " ? " << nulls_first_ << " : "
+         << "(" << x_value << " < " << y_value << ")));\n";
       comp_str = ss.str();
     } else {
       std::stringstream ss;
-      ss << "return " << is_x_null << " && " << is_y_null << " ? " << "false" << " : "
+      ss << "return " << is_x_null << " && " << is_y_null << " ? "
+         << "false"
+         << " : "
          << "(" << is_x_null << " ? " << !nulls_first_ << " : "
          << "(" << is_y_null << " ? " << nulls_first_ << " : "
          << "(" << x_value << " > " << y_value << ")));\n";
@@ -459,14 +401,14 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx,
       return comp_str;
     }
     std::stringstream ss;
-    ss << "if (" << x_value << " == " << y_value << " || (" << is_x_null << " && " << is_y_null << ")) {"
-       << GetCompFunction_(cur_key_index + 1, sort_key_index_list) << "} else { "
-       << comp_str << "}";
+    ss << "if (" << x_value << " == " << y_value << " || (" << is_x_null << " && "
+       << is_y_null << ")) {" << GetCompFunction_(cur_key_index + 1, sort_key_index_list)
+       << "} else { " << comp_str << "}";
     return ss.str();
   }
 
   std::string GetSortFunction(std::vector<int>& key_index_list) {
-    return "std::sort(indices_begin, indices_begin + "
+    return "gfx::timsort(indices_begin, indices_begin + "
            "items_total, "
            "comp);";
   }
@@ -568,9 +510,9 @@ template <typename DATATYPE, typename CTYPE>
 class WindowSortOnekeyKernel : public WindowSortKernel::Impl {
  public:
   WindowSortOnekeyKernel(arrow::compute::ExecContext* ctx,
-                   std::vector<std::shared_ptr<arrow::Field>> key_field_list,
-                   std::shared_ptr<arrow::Schema> result_schema,
-                   bool nulls_first, bool asc)
+                         std::vector<std::shared_ptr<arrow::Field>> key_field_list,
+                         std::shared_ptr<arrow::Schema> result_schema, bool nulls_first,
+                         bool asc)
       : ctx_(ctx), nulls_first_(nulls_first), asc_(asc), result_schema_(result_schema) {
     auto indices = result_schema->GetAllFieldIndices(key_field_list[0]->name());
     key_id_ = indices[0];
@@ -579,7 +521,7 @@ class WindowSortOnekeyKernel : public WindowSortKernel::Impl {
 
   arrow::Status Evaluate(const ArrayList& in) override {
     num_batches_++;
-    cached_key_.push_back(std::dynamic_pointer_cast<ArrayType_key>(in[key_id_]));
+    cached_key_.push_back(std::make_shared<ArrayType_key>(in[key_id_]));
     length_list_.push_back(in[key_id_]->length());
     if (cached_.size() <= col_num_) {
       cached_.resize(col_num_ + 1);
@@ -591,10 +533,11 @@ class WindowSortOnekeyKernel : public WindowSortKernel::Impl {
   }
 
   arrow::Status FinishInternal(std::shared_ptr<arrow::Array> in,
-      std::shared_ptr<FixedSizeBinaryArray>* out) {
+                               std::shared_ptr<FixedSizeBinaryArray>* out) {
     int items_total = 0;
     int nulls_total = 0;
-    std::shared_ptr<arrow::UInt64Array> selected = std::dynamic_pointer_cast<arrow::UInt64Array>(in);
+    std::shared_ptr<arrow::UInt64Array> selected =
+        std::dynamic_pointer_cast<arrow::UInt64Array>(in);
     for (int i = 0; i < selected->length(); i++) {
       uint64_t encoded = selected->GetView(i);
       uint16_t array_id = (encoded & 0xFFFF0000U) >> 16U;
@@ -608,12 +551,12 @@ class WindowSortOnekeyKernel : public WindowSortKernel::Impl {
     }
     // initiate buffer for all arrays
     std::shared_ptr<arrow::Buffer> indices_buf;
-    int64_t buf_size = items_total * sizeof(ArrayItemIndex);
+    int64_t buf_size = items_total * sizeof(ArrayItemIndexS);
     auto maybe_buffer = arrow::AllocateBuffer(buf_size, ctx_->memory_pool());
     indices_buf = *std::move(maybe_buffer);
-    ArrayItemIndex* indices_begin =
-        reinterpret_cast<ArrayItemIndex*>(indices_buf->mutable_data());
-    ArrayItemIndex* indices_end = indices_begin + items_total;
+    ArrayItemIndexS* indices_begin =
+        reinterpret_cast<ArrayItemIndexS*>(indices_buf->mutable_data());
+    ArrayItemIndexS* indices_end = indices_begin + items_total;
     int64_t indices_i = 0;
     int64_t indices_null = 0;
     // we should support nulls first and nulls last here
@@ -646,36 +589,41 @@ class WindowSortOnekeyKernel : public WindowSortKernel::Impl {
       }
     }
     if (asc_) {
+      auto comp = [this](const ArrayItemIndexS& x, const ArrayItemIndexS& y) {
+        return cached_key_[x.array_id]->GetView(x.id) <
+               cached_key_[y.array_id]->GetView(y.id);
+      };
       if (nulls_first_) {
-        ska_sort(indices_begin + nulls_total, indices_begin + items_total,
-                 [this](auto& x) -> decltype(auto){ return cached_key_[x.array_id]->GetView(x.id); });
+        gfx::timsort(indices_begin + nulls_total, indices_begin + items_total, comp);
       } else {
-        ska_sort(indices_begin, indices_begin + items_total - nulls_total,
-                 [this](auto& x) -> decltype(auto){ return cached_key_[x.array_id]->GetView(x.id); });
+        gfx::timsort(indices_begin, indices_begin + items_total - nulls_total, comp);
       }
     } else {
-      auto comp = [this](const ArrayItemIndex& x, const ArrayItemIndex& y) {
-        return cached_key_[x.array_id]->GetView(x.id) > cached_key_[y.array_id]->GetView(y.id);
+      auto comp = [this](const ArrayItemIndexS& x, const ArrayItemIndexS& y) {
+        return cached_key_[x.array_id]->GetView(x.id) >
+               cached_key_[y.array_id]->GetView(y.id);
       };
       if (nulls_first_) {
-        std::sort(indices_begin + nulls_total, indices_begin + items_total, comp);
+        gfx::timsort(indices_begin + nulls_total, indices_begin + items_total, comp);
       } else {
-        std::sort(indices_begin, indices_begin + items_total - nulls_total, comp);
+        gfx::timsort(indices_begin, indices_begin + items_total - nulls_total, comp);
       }
     }
     std::shared_ptr<arrow::FixedSizeBinaryType> out_type;
-    RETURN_NOT_OK(MakeFixedSizeBinaryType(sizeof(ArrayItemIndex) / sizeof(int32_t), &out_type));
+    RETURN_NOT_OK(
+        MakeFixedSizeBinaryType(sizeof(ArrayItemIndexS) / sizeof(int32_t), &out_type));
     RETURN_NOT_OK(MakeFixedSizeBinaryArray(out_type, items_total, indices_buf, out));
     return arrow::Status::OK();
   }
 
-  arrow::Status Finish(std::shared_ptr<arrow::Array> in, std::shared_ptr<arrow::Array>* out) override {
+  arrow::Status Finish(std::shared_ptr<arrow::Array> in,
+                       std::shared_ptr<arrow::Array>* out) override {
     std::shared_ptr<FixedSizeBinaryArray> indices_out;
     RETURN_NOT_OK(FinishInternal(in, &indices_out));
     arrow::UInt64Builder builder;
-    auto *index = (ArrayItemIndex *) indices_out->value_data();
+    auto* index = (ArrayItemIndexS*)indices_out->value_data();
     for (int i = 0; i < indices_out->length(); i++) {
-      uint64_t encoded = ((uint64_t) (index->array_id) << 16U) ^ ((uint64_t) (index->id));
+      uint64_t encoded = ((uint64_t)(index->array_id) << 16U) ^ ((uint64_t)(index->id));
       RETURN_NOT_OK(builder.Append(encoded));
       index++;
     }
@@ -684,8 +632,8 @@ class WindowSortOnekeyKernel : public WindowSortKernel::Impl {
   }
 
  private:
-  using ArrayType_key = typename arrow::TypeTraits<DATATYPE>::ArrayType;
-  //using ArrayType_key = arrow::UInt32Array;
+  using ArrayType_key = typename TypeTraits<DATATYPE>::ArrayType;
+  // using ArrayType_key = arrow::UInt32Array;
   std::vector<std::shared_ptr<ArrayType_key>> cached_key_;
   std::vector<arrow::ArrayVector> cached_;
   arrow::compute::ExecContext* ctx_;
@@ -704,21 +652,21 @@ arrow::Status WindowSortKernel::Make(
     std::shared_ptr<arrow::Schema> result_schema, std::shared_ptr<KernalBase>* out,
     bool nulls_first, bool asc) {
   *out = std::make_shared<WindowSortKernel>(ctx, key_field_list, result_schema,
-                                                     nulls_first, asc);
+                                            nulls_first, asc);
   return arrow::Status::OK();
 }
 
-#define PROCESS_SUPPORTED_TYPES_WINDOW_SORT(PROC) \
-  PROC(arrow::UInt8Type, arrow::UInt8Builder, arrow::UInt8Array)              \
-  PROC(arrow::Int8Type, arrow::Int8Builder, arrow::Int8Array)                 \
-  PROC(arrow::UInt16Type, arrow::UInt16Builder, arrow::UInt16Array)           \
-  PROC(arrow::Int16Type, arrow::Int16Builder, arrow::Int16Array)              \
-  PROC(arrow::UInt32Type, arrow::UInt32Builder, arrow::UInt32Array)           \
-  PROC(arrow::Int32Type, arrow::Int32Builder, arrow::Int32Array)              \
-  PROC(arrow::UInt64Type, arrow::UInt64Builder, arrow::UInt64Array)           \
-  PROC(arrow::Int64Type, arrow::Int64Builder, arrow::Int64Array)              \
-  PROC(arrow::FloatType, arrow::FloatBuilder, arrow::FloatArray)              \
-  PROC(arrow::DoubleType, arrow::DoubleBuilder, arrow::DoubleArray)           \
+#define PROCESS_SUPPORTED_TYPES_WINDOW_SORT(PROC)                   \
+  PROC(arrow::UInt8Type, arrow::UInt8Builder, arrow::UInt8Array)    \
+  PROC(arrow::Int8Type, arrow::Int8Builder, arrow::Int8Array)       \
+  PROC(arrow::UInt16Type, arrow::UInt16Builder, arrow::UInt16Array) \
+  PROC(arrow::Int16Type, arrow::Int16Builder, arrow::Int16Array)    \
+  PROC(arrow::UInt32Type, arrow::UInt32Builder, arrow::UInt32Array) \
+  PROC(arrow::Int32Type, arrow::Int32Builder, arrow::Int32Array)    \
+  PROC(arrow::UInt64Type, arrow::UInt64Builder, arrow::UInt64Array) \
+  PROC(arrow::Int64Type, arrow::Int64Builder, arrow::Int64Array)    \
+  PROC(arrow::FloatType, arrow::FloatBuilder, arrow::FloatArray)    \
+  PROC(arrow::DoubleType, arrow::DoubleBuilder, arrow::DoubleArray) \
   PROC(arrow::Decimal128Type, arrow::Decimal128Builder, arrow::Decimal128Array)
 
 WindowSortKernel::WindowSortKernel(
@@ -730,15 +678,15 @@ WindowSortKernel::WindowSortKernel(
     std::cout << "UseSortOneKey" << std::endl;
 #endif
     if (key_field_list[0]->type()->id() == arrow::Type::STRING) {
-      impl_.reset(
-          new WindowSortOnekeyKernel<arrow::StringType, std::string>(ctx, key_field_list,
-                                                               result_schema, nulls_first, asc));
+      impl_.reset(new WindowSortOnekeyKernel<arrow::StringType, std::string>(
+          ctx, key_field_list, result_schema, nulls_first, asc));
     } else {
       switch (key_field_list[0]->type()->id()) {
-#define PROCESS(InType, BUILDER_TYPE, ARRAY_TYPE)                                                         \
-  case InType::type_id: {                                                     \
-    using CType = typename TypeTraits<InType>::CType;                  \
-    impl_.reset(new WindowSortOnekeyKernel<InType, CType>(ctx, key_field_list, result_schema, nulls_first, asc));  \
+#define PROCESS(InType, BUILDER_TYPE, ARRAY_TYPE)               \
+  case InType::type_id: {                                       \
+    using CType = typename TypeTraits<InType>::CType;           \
+    impl_.reset(new WindowSortOnekeyKernel<InType, CType>(      \
+        ctx, key_field_list, result_schema, nulls_first, asc)); \
   } break;
         PROCESS_SUPPORTED_TYPES_WINDOW_SORT(PROCESS)
 #undef PROCESS
diff --git a/native-sql-engine/cpp/src/codegen/code_generator_factory.h b/native-sql-engine/cpp/src/codegen/code_generator_factory.h
index c1cb5914a..5547c7f11 100644
--- a/native-sql-engine/cpp/src/codegen/code_generator_factory.h
+++ b/native-sql-engine/cpp/src/codegen/code_generator_factory.h
@@ -27,8 +27,7 @@
 namespace sparkcolumnarplugin {
 namespace codegen {
 arrow::Status CreateCodeGenerator(
-    arrow::MemoryPool* memory_pool,
-    std::shared_ptr<arrow::Schema> schema_ptr,
+    arrow::MemoryPool* memory_pool, std::shared_ptr<arrow::Schema> schema_ptr,
     std::vector<std::shared_ptr<::gandiva::Expression>> exprs_vector,
     std::vector<std::shared_ptr<arrow::Field>> ret_types,
     std::shared_ptr<CodeGenerator>* out, bool return_when_finish = false,
@@ -40,7 +39,8 @@ arrow::Status CreateCodeGenerator(
   switch (codegen_type) {
     case ARROW_COMPUTE:
       *out = std::make_shared<arrowcompute::ArrowComputeCodeGenerator>(
-          memory_pool, schema_ptr, exprs_vector, ret_types, return_when_finish, finish_exprs_vector);
+          memory_pool, schema_ptr, exprs_vector, ret_types, return_when_finish,
+          finish_exprs_vector);
       break;
     case GANDIVA:
       *out = std::make_shared<gandiva::GandivaCodeGenerator>(
diff --git a/native-sql-engine/cpp/src/codegen/common/hash_relation.h b/native-sql-engine/cpp/src/codegen/common/hash_relation.h
index 6cb5efebe..95b5b3ed1 100644
--- a/native-sql-engine/cpp/src/codegen/common/hash_relation.h
+++ b/native-sql-engine/cpp/src/codegen/common/hash_relation.h
@@ -27,6 +27,7 @@
 #include "precompile/unsafe_array.h"
 #include "third_party/murmurhash/murmurhash32.h"
 #include "third_party/row_wise_memory/hashMap.h"
+#include "utils/macros.h"
 
 using sparkcolumnarplugin::codegen::arrowcompute::extra::ArrayItemIndex;
 using sparkcolumnarplugin::precompile::enable_if_number;
@@ -142,6 +143,11 @@ class HashRelation {
   }
 
   arrow::Status InitHashTable(int init_key_capacity, int initial_bytesmap_capacity) {
+    if (init_key_capacity < 0 || initial_bytesmap_capacity < 0) {
+      THROW_NOT_OK(arrow::Status::Invalid(
+          "initialization size is overflowed, init_key_capacity is ", init_key_capacity,
+          ", initial_bytesmap_capacity is ", initial_bytesmap_capacity));
+    }
     hash_table_ = createUnsafeHashMap(ctx_->memory_pool(), init_key_capacity,
                                       initial_bytesmap_capacity, key_size_);
     return arrow::Status::OK();
@@ -161,9 +167,9 @@ class HashRelation {
     return arrow::Status::Invalid("Error minimizing hash table");
   }
 
-  arrow::Status AppendKeyColumn(
-      std::shared_ptr<arrow::Array> in,
-      const std::vector<std::shared_ptr<UnsafeArray>>& payloads) {
+  arrow::Status AppendKeyColumn(std::shared_ptr<arrow::Array> in,
+                                const std::vector<std::shared_ptr<UnsafeArray>>& payloads,
+                                bool semi = false) {
     if (hash_table_ == nullptr) {
       throw std::runtime_error("HashRelation Get failed, hash_table is null.");
     }
@@ -175,9 +181,14 @@ class HashRelation {
       for (auto payload_arr : payloads) {
         payload_arr->Append(i, &payload);
       }
-      // chendi: Since spark won't join rows contain null, we will skip null row.
+      // chendi: Since spark won't join rows contain null, we will skip null
+      // row.
       if (payload->isNullExists()) continue;
-      RETURN_NOT_OK(Insert(typed_array->GetView(i), payload, num_arrays_, i));
+      if (!semi) {
+        RETURN_NOT_OK(Insert(typed_array->GetView(i), payload, num_arrays_, i));
+      } else {
+        RETURN_NOT_OK(InsertSkipDup(typed_array->GetView(i), payload, num_arrays_, i));
+      }
     }
 
     num_arrays_++;
@@ -189,7 +200,8 @@ class HashRelation {
             typename std::enable_if_t<!std::is_same<KeyArrayType, StringArray>::value>* =
                 nullptr>
   arrow::Status AppendKeyColumn(std::shared_ptr<arrow::Array> in,
-                                std::shared_ptr<KeyArrayType> original_key) {
+                                std::shared_ptr<KeyArrayType> original_key,
+                                bool semi = false) {
     if (hash_table_ == nullptr) {
       throw std::runtime_error("HashRelation Get failed, hash_table is null.");
     }
@@ -205,8 +217,13 @@ class HashRelation {
         if (original_key->IsNull(i)) {
           RETURN_NOT_OK(InsertNull(num_arrays_, i));
         } else {
-          RETURN_NOT_OK(
-              Insert(typed_array->GetView(i), original_key->GetView(i), num_arrays_, i));
+          if (!semi) {
+            RETURN_NOT_OK(Insert(typed_array->GetView(i), original_key->GetView(i),
+                                 num_arrays_, i));
+          } else {
+            RETURN_NOT_OK(InsertSkipDup(typed_array->GetView(i), original_key->GetView(i),
+                                        num_arrays_, i));
+          }
         }
       }
     }
@@ -217,7 +234,8 @@ class HashRelation {
   }
 
   arrow::Status AppendKeyColumn(std::shared_ptr<arrow::Array> in,
-                                std::shared_ptr<StringArray> original_key) {
+                                std::shared_ptr<StringArray> original_key,
+                                bool semi = false) {
     if (hash_table_ == nullptr) {
       throw std::runtime_error("HashRelation Get failed, hash_table is null.");
     }
@@ -235,8 +253,13 @@ class HashRelation {
           RETURN_NOT_OK(InsertNull(num_arrays_, i));
         } else {
           auto str = original_key->GetString(i);
-          RETURN_NOT_OK(
-              Insert(typed_array->GetView(i), str.data(), str.size(), num_arrays_, i));
+          if (!semi) {
+            RETURN_NOT_OK(
+                Insert(typed_array->GetView(i), str.data(), str.size(), num_arrays_, i));
+          } else {
+            RETURN_NOT_OK(InsertSkipDup(typed_array->GetView(i), str.data(), str.size(),
+                                        num_arrays_, i));
+          }
         }
       }
     }
@@ -455,6 +478,38 @@ class HashRelation {
     return arrow::Status::OK();
   }
 
+  arrow::Status InsertSkipDup(int32_t v, std::shared_ptr<UnsafeRow> payload,
+                              uint32_t array_id, uint32_t id) {
+    assert(hash_table_ != nullptr);
+    auto index = ArrayItemIndex(array_id, id);
+    if (!appendNewKey(hash_table_, payload.get(), v, (char*)&index,
+                      sizeof(ArrayItemIndex))) {
+      return arrow::Status::CapacityError("Insert to HashMap failed.");
+    }
+    return arrow::Status::OK();
+  }
+
+  template <typename CType>
+  arrow::Status InsertSkipDup(int32_t v, CType payload, uint32_t array_id, uint32_t id) {
+    assert(hash_table_ != nullptr);
+    auto index = ArrayItemIndex(array_id, id);
+    if (!appendNewKey(hash_table_, payload, v, (char*)&index, sizeof(ArrayItemIndex))) {
+      return arrow::Status::CapacityError("Insert to HashMap failed.");
+    }
+    return arrow::Status::OK();
+  }
+
+  arrow::Status InsertSkipDup(int32_t v, const char* payload, size_t payload_len,
+                              uint32_t array_id, uint32_t id) {
+    assert(hash_table_ != nullptr);
+    auto index = ArrayItemIndex(array_id, id);
+    if (!appendNewKey(hash_table_, payload, payload_len, v, (char*)&index,
+                      sizeof(ArrayItemIndex))) {
+      return arrow::Status::CapacityError("Insert to HashMap failed.");
+    }
+    return arrow::Status::OK();
+  }
+
   arrow::Status InsertNull(uint32_t array_id, uint32_t id) {
     // since vanilla spark doesn't support match null in join
     // we can directly retun to optimize
diff --git a/native-sql-engine/cpp/src/codegen/common/relation_column.h b/native-sql-engine/cpp/src/codegen/common/relation_column.h
index 8cafc657f..4317178dc 100644
--- a/native-sql-engine/cpp/src/codegen/common/relation_column.h
+++ b/native-sql-engine/cpp/src/codegen/common/relation_column.h
@@ -45,7 +45,8 @@ template <typename T, typename Enable = void>
 class TypedRelationColumn {};
 
 template <typename DataType>
-class TypedRelationColumn<DataType, enable_if_number_or_decimal<DataType>> : public RelationColumn {
+class TypedRelationColumn<DataType, enable_if_number_or_decimal<DataType>>
+    : public RelationColumn {
  public:
   using T = typename TypeTraits<DataType>::CType;
   TypedRelationColumn() {}
diff --git a/native-sql-engine/cpp/src/codegen/common/sort_relation.h b/native-sql-engine/cpp/src/codegen/common/sort_relation.h
index cb7a07784..f0caba42e 100644
--- a/native-sql-engine/cpp/src/codegen/common/sort_relation.h
+++ b/native-sql-engine/cpp/src/codegen/common/sort_relation.h
@@ -44,7 +44,7 @@ class SortRelation {
     sort_relation_key_list_ = sort_relation_key_list;
     sort_relation_payload_list_ = sort_relation_payload_list;
     int64_t buf_size = items_total_ * sizeof(ArrayItemIndexS);
-    auto maybe_buffer =  arrow::AllocateBuffer(buf_size, ctx_->memory_pool());
+    auto maybe_buffer = arrow::AllocateBuffer(buf_size, ctx_->memory_pool());
     indices_buf_ = *std::move(maybe_buffer);
     indices_begin_ = reinterpret_cast<ArrayItemIndexS*>(indices_buf_->mutable_data());
     uint64_t idx = 0;
diff --git a/native-sql-engine/cpp/src/codegen/compute_ext/code_generator.h b/native-sql-engine/cpp/src/codegen/compute_ext/code_generator.h
index db2b1690f..47c238ec4 100644
--- a/native-sql-engine/cpp/src/codegen/compute_ext/code_generator.h
+++ b/native-sql-engine/cpp/src/codegen/compute_ext/code_generator.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <arrow/type.h>
+
 #include "codegen/code_generator.h"
 
 namespace sparkcolumnarplugin {
diff --git a/native-sql-engine/cpp/src/codegen/expr_visitor.h b/native-sql-engine/cpp/src/codegen/expr_visitor.h
index a1b55277b..8cd6d4ed3 100644
--- a/native-sql-engine/cpp/src/codegen/expr_visitor.h
+++ b/native-sql-engine/cpp/src/codegen/expr_visitor.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <iostream>
+
 #include "codegen/code_generator.h"
 #include "codegen/common/visitor_base.h"
 
@@ -46,7 +47,8 @@ class ExprVisitor : public VisitorBase {
 
  private:
   // std::vector<std::string> ac{
-  //    "sum", "max", "min", "count", "getPrepareFunc", "splitArrayList", "encodeArray"};
+  //    "sum", "max", "min", "count", "getPrepareFunc", "splitArrayList",
+  //    "encodeArray"};
   std::vector<std::string> gdv{"add", "substract", "multiply", "divide"};
   std::vector<std::string> ce{};
   int codegen_type;
diff --git a/native-sql-engine/cpp/src/codegen/gandiva/code_generator.h b/native-sql-engine/cpp/src/codegen/gandiva/code_generator.h
index 222ed8f84..d5ae31b45 100644
--- a/native-sql-engine/cpp/src/codegen/gandiva/code_generator.h
+++ b/native-sql-engine/cpp/src/codegen/gandiva/code_generator.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <arrow/type.h>
+
 #include "codegen/code_generator.h"
 
 namespace sparkcolumnarplugin {
diff --git a/native-sql-engine/cpp/src/jni/jni_common.h b/native-sql-engine/cpp/src/jni/jni_common.h
index f31928de3..461f17788 100644
--- a/native-sql-engine/cpp/src/jni/jni_common.h
+++ b/native-sql-engine/cpp/src/jni/jni_common.h
@@ -206,10 +206,9 @@ arrow::Status MakeExprVector(JNIEnv* env, jbyteArray exprs_arr,
 
 jbyteArray ToSchemaByteArray(JNIEnv* env, std::shared_ptr<arrow::Schema> schema) {
   arrow::Status status;
-  //std::shared_ptr<arrow::Buffer> buffer;
+  // std::shared_ptr<arrow::Buffer> buffer;
   arrow::Result<std::shared_ptr<arrow::Buffer>> maybe_buffer;
-  maybe_buffer = arrow::ipc::SerializeSchema(*schema.get(),
-                                       arrow::default_memory_pool());
+  maybe_buffer = arrow::ipc::SerializeSchema(*schema.get(), arrow::default_memory_pool());
   if (!status.ok()) {
     std::string error_message =
         "Unable to convert schema to byte array, err is " + status.message();
@@ -225,13 +224,13 @@ jbyteArray ToSchemaByteArray(JNIEnv* env, std::shared_ptr<arrow::Schema> schema)
 arrow::Result<arrow::Compression::type> GetCompressionType(JNIEnv* env,
                                                            jstring codec_jstr) {
   auto codec_l = env->GetStringUTFChars(codec_jstr, JNI_FALSE);
-  
+
   std::string codec_u;
   std::transform(codec_l, codec_l + std::strlen(codec_l), std::back_inserter(codec_u),
                  ::tolower);
 
   ARROW_ASSIGN_OR_RAISE(auto compression_type,
-                        arrow::util::Codec::GetCompressionType(codec_u));              
+                        arrow::util::Codec::GetCompressionType(codec_u));
 
   if (compression_type == arrow::Compression::LZ4) {
     compression_type = arrow::Compression::LZ4_FRAME;
diff --git a/native-sql-engine/cpp/src/jni/jni_wrapper.cc b/native-sql-engine/cpp/src/jni/jni_wrapper.cc
index f0df32119..1a4dbbdc4 100644
--- a/native-sql-engine/cpp/src/jni/jni_wrapper.cc
+++ b/native-sql-engine/cpp/src/jni/jni_wrapper.cc
@@ -858,7 +858,8 @@ Java_com_intel_oap_vectorized_BatchIterator_nativeProcessAndCacheOne(
 
   if (!status.ok()) {
     std::string error_message =
-        "nativeProcessAndCache: ResultIterator process next failed with error msg " +
+        "nativeProcessAndCache: ResultIterator process next failed with error "
+        "msg " +
         status.ToString();
     env->ThrowNew(io_exception_class, error_message.c_str());
   }
@@ -910,7 +911,8 @@ Java_com_intel_oap_vectorized_BatchIterator_nativeProcessAndCacheOneWithSelectio
 
   if (!status.ok()) {
     std::string error_message =
-        "nativeProcessAndCache: ResultIterator process next failed with error msg " +
+        "nativeProcessAndCache: ResultIterator process next failed with error "
+        "msg " +
         status.ToString();
     env->ThrowNew(io_exception_class, error_message.c_str());
   }
@@ -957,12 +959,9 @@ Java_com_intel_oap_vectorized_AdaptorReferenceManager_nativeRelease(JNIEnv* env,
   buffer_holder_.Erase(id);
 }
 
-
-
 JNIEXPORT jlong JNICALL
 Java_com_intel_oap_vectorized_ShuffleSplitterJniWrapper_nativeSpill(
     JNIEnv* env, jobject obj, jlong splitter_id, jlong size, jboolean call_by_self) {
-
   auto splitter = shuffle_splitter_holder_.Lookup(splitter_id);
   if (!splitter) {
     std::string error_message = "Invalid splitter id " + std::to_string(splitter_id);
@@ -1105,8 +1104,9 @@ Java_com_intel_oap_vectorized_ShuffleSplitterJniWrapper_nativeMake(
   return shuffle_splitter_holder_.Insert(std::shared_ptr<Splitter>(splitter));
 }
 
-JNIEXPORT void JNICALL Java_com_intel_oap_vectorized_ShuffleSplitterJniWrapper_setCompressType(
-   JNIEnv* env, jobject, jlong splitter_id, jstring compression_type_jstr) {
+JNIEXPORT void JNICALL
+Java_com_intel_oap_vectorized_ShuffleSplitterJniWrapper_setCompressType(
+    JNIEnv* env, jobject, jlong splitter_id, jstring compression_type_jstr) {
   auto splitter = shuffle_splitter_holder_.Lookup(splitter_id);
   if (!splitter) {
     std::string error_message = "Invalid splitter id " + std::to_string(splitter_id);
@@ -1178,9 +1178,9 @@ JNIEXPORT jlong JNICALL Java_com_intel_oap_vectorized_ShuffleSplitterJniWrapper_
     if (!status.ok()) {
       // Throw IOException
       env->ThrowNew(io_exception_class,
-                  std::string("Native split: splitter split failed, error message is " +
-                              status.message())
-                      .c_str());
+                    std::string("Native split: splitter split failed, error message is " +
+                                status.message())
+                        .c_str());
     }
     return -1;
   }
@@ -1335,16 +1335,16 @@ JNIEXPORT void JNICALL Java_com_intel_oap_vectorized_ShuffleDecompressionJniWrap
   decompression_schema_holder_.Erase(schema_holder_id);
 }
 
-JNIEXPORT void JNICALL
-Java_com_intel_oap_tpc_MallocUtils_mallocTrim(JNIEnv* env, jobject obj) {
-//  malloc_stats_print(statsPrint, nullptr, nullptr);
+JNIEXPORT void JNICALL Java_com_intel_oap_tpc_MallocUtils_mallocTrim(JNIEnv* env,
+                                                                     jobject obj) {
+  //  malloc_stats_print(statsPrint, nullptr, nullptr);
   std::cout << "Calling malloc_trim... " << std::endl;
   malloc_trim(0);
 }
 
-JNIEXPORT void JNICALL
-Java_com_intel_oap_tpc_MallocUtils_mallocStats(JNIEnv* env, jobject obj) {
-//  malloc_stats_print(statsPrint, nullptr, nullptr);
+JNIEXPORT void JNICALL Java_com_intel_oap_tpc_MallocUtils_mallocStats(JNIEnv* env,
+                                                                      jobject obj) {
+  //  malloc_stats_print(statsPrint, nullptr, nullptr);
   std::cout << "Calling malloc_stats... " << std::endl;
   malloc_stats();
 }
diff --git a/native-sql-engine/cpp/src/precompile/builder.cc b/native-sql-engine/cpp/src/precompile/builder.cc
index 8d2fbc7b9..34cde0b0d 100644
--- a/native-sql-engine/cpp/src/precompile/builder.cc
+++ b/native-sql-engine/cpp/src/precompile/builder.cc
@@ -32,7 +32,7 @@ namespace precompile {
   };                                                                                    \
                                                                                         \
   TYPENAME::TYPENAME(arrow::MemoryPool* pool) { impl_ = std::make_shared<Impl>(pool); } \
-  arrow::Status TYPENAME::Append(CTYPE value) { return impl_->Append(value); }          \
+  arrow::Status TYPENAME::Append(const CTYPE& value) { return impl_->Append(value); }   \
   arrow::Status TYPENAME::AppendNull() { return impl_->AppendNull(); }                  \
   arrow::Status TYPENAME::Reserve(int64_t length) { return impl_->Reserve(length); }    \
   arrow::Status TYPENAME::AppendNulls(int64_t length) {                                 \
@@ -69,10 +69,10 @@ class StringBuilder::Impl : public arrow::StringBuilder {
 StringBuilder::StringBuilder(arrow::MemoryPool* pool) {
   impl_ = std::make_shared<Impl>(pool);
 }
-arrow::Status StringBuilder::Append(arrow::util::string_view value) {
+arrow::Status StringBuilder::Append(const arrow::util::string_view& value) {
   return impl_->Append(value);
 }
-arrow::Status StringBuilder::AppendString(std::string value) {
+arrow::Status StringBuilder::AppendString(const std::string& value) {
   return impl_->Append(arrow::util::string_view(value));
 }
 arrow::Status StringBuilder::AppendNull() { return impl_->AppendNull(); }
@@ -94,7 +94,7 @@ Decimal128Builder::Decimal128Builder(std::shared_ptr<arrow::DataType> type,
                                      arrow::MemoryPool* pool) {
   impl_ = std::make_shared<Impl>(type, pool);
 }
-arrow::Status Decimal128Builder::Append(arrow::Decimal128 value) {
+arrow::Status Decimal128Builder::Append(const arrow::Decimal128& value) {
   return impl_->Append(value);
 }
 arrow::Status Decimal128Builder::AppendNull() { return impl_->AppendNull(); }
diff --git a/native-sql-engine/cpp/src/precompile/builder.h b/native-sql-engine/cpp/src/precompile/builder.h
index 8efbcf21b..12c68d0b7 100644
--- a/native-sql-engine/cpp/src/precompile/builder.h
+++ b/native-sql-engine/cpp/src/precompile/builder.h
@@ -24,7 +24,7 @@ namespace precompile {
   class TYPENAME {                                            \
    public:                                                    \
     TYPENAME(arrow::MemoryPool* pool);                        \
-    arrow::Status Append(TYPE val);                           \
+    arrow::Status Append(const TYPE& val);                    \
     arrow::Status AppendNull();                               \
     arrow::Status Reserve(int64_t);                           \
     arrow::Status AppendNulls(int64_t);                       \
@@ -53,8 +53,8 @@ TYPED_BUILDER_DEFINE(Date64Builder, int64_t)
 class StringBuilder {
  public:
   StringBuilder(arrow::MemoryPool* pool);
-  arrow::Status Append(arrow::util::string_view val);
-  arrow::Status AppendString(std::string val);
+  arrow::Status Append(const arrow::util::string_view& val);
+  arrow::Status AppendString(const std::string& val);
   arrow::Status AppendNull();
   arrow::Status Finish(std::shared_ptr<arrow::Array>* out);
   arrow::Status Reset();
@@ -67,7 +67,7 @@ class StringBuilder {
 class Decimal128Builder {
  public:
   Decimal128Builder(std::shared_ptr<arrow::DataType> type, arrow::MemoryPool* pool);
-  arrow::Status Append(arrow::Decimal128 val);
+  arrow::Status Append(const arrow::Decimal128& val);
   arrow::Status AppendNull();
   arrow::Status Reserve(int64_t);
   arrow::Status AppendNulls(int64_t);
diff --git a/native-sql-engine/cpp/src/precompile/gandiva.h b/native-sql-engine/cpp/src/precompile/gandiva.h
index a337f656c..a97b654b6 100644
--- a/native-sql-engine/cpp/src/precompile/gandiva.h
+++ b/native-sql-engine/cpp/src/precompile/gandiva.h
@@ -71,12 +71,10 @@ arrow::Decimal128 castDECIMAL(arrow::Decimal128 in, int32_t original_precision,
   return arrow::Decimal128(out);
 }
 
-arrow::Decimal128 castDECIMALNullOnOverflow(arrow::Decimal128 in, 
+arrow::Decimal128 castDECIMALNullOnOverflow(arrow::Decimal128 in,
                                             int32_t original_precision,
-                                            int32_t original_scale, 
-                                            int32_t new_precision,
-                                            int32_t new_scale,
-                                            bool* overflow_) {
+                                            int32_t original_scale, int32_t new_precision,
+                                            int32_t new_scale, bool* overflow_) {
   bool overflow = false;
   gandiva::BasicDecimalScalar128 val(in, original_precision, original_scale);
   auto out = gandiva::decimalops::Convert(val, new_precision, new_scale, &overflow);
@@ -86,14 +84,12 @@ arrow::Decimal128 castDECIMALNullOnOverflow(arrow::Decimal128 in,
   return arrow::Decimal128(out);
 }
 
-arrow::Decimal128 add(arrow::Decimal128 left, int32_t left_precision,
-                      int32_t left_scale, arrow::Decimal128 right,
-                      int32_t right_precision, int32_t right_scale,
-                      int32_t out_precision, int32_t out_scale) {
+arrow::Decimal128 add(arrow::Decimal128 left, int32_t left_precision, int32_t left_scale,
+                      arrow::Decimal128 right, int32_t right_precision,
+                      int32_t right_scale, int32_t out_precision, int32_t out_scale) {
   gandiva::BasicDecimalScalar128 x(left, left_precision, left_scale);
   gandiva::BasicDecimalScalar128 y(right, right_precision, right_scale);
-  arrow::BasicDecimal128 out =
-      gandiva::decimalops::Add(x, y, out_precision, out_scale);
+  arrow::BasicDecimal128 out = gandiva::decimalops::Add(x, y, out_precision, out_scale);
   return arrow::Decimal128(out);
 }
 
@@ -111,8 +107,7 @@ arrow::Decimal128 subtract(arrow::Decimal128 left, int32_t left_precision,
 arrow::Decimal128 multiply(arrow::Decimal128 left, int32_t left_precision,
                            int32_t left_scale, arrow::Decimal128 right,
                            int32_t right_precision, int32_t right_scale,
-                           int32_t out_precision, int32_t out_scale, 
-                           bool* overflow_) {
+                           int32_t out_precision, int32_t out_scale, bool* overflow_) {
   gandiva::BasicDecimalScalar128 x(left, left_precision, left_scale);
   gandiva::BasicDecimalScalar128 y(right, right_precision, right_scale);
   bool overflow = false;
@@ -127,8 +122,7 @@ arrow::Decimal128 multiply(arrow::Decimal128 left, int32_t left_precision,
 arrow::Decimal128 divide(arrow::Decimal128 left, int32_t left_precision,
                          int32_t left_scale, arrow::Decimal128 right,
                          int32_t right_precision, int32_t right_scale,
-                         int32_t out_precision, int32_t out_scale,
-                         bool* overflow_) {
+                         int32_t out_precision, int32_t out_scale, bool* overflow_) {
   gandiva::BasicDecimalScalar128 x(left, left_precision, left_scale);
   gandiva::BasicDecimalScalar128 y(right, right_precision, right_scale);
   bool overflow = false;
@@ -141,7 +135,7 @@ arrow::Decimal128 divide(arrow::Decimal128 left, int32_t left_precision,
 }
 
 // A comparison with a NaN always returns false even when comparing with itself.
-// To get the same result as spark, we can regard NaN as big as Infinity when 
+// To get the same result as spark, we can regard NaN as big as Infinity when
 // doing comparison.
 bool less_than_with_nan(double left, double right) {
   bool left_is_nan = std::isnan(left);
@@ -207,3 +201,15 @@ bool equal_with_nan(double left, double right) {
   }
   return left == right;
 }
+
+arrow::Decimal128 round(arrow::Decimal128 in, int32_t original_precision,
+                        int32_t original_scale, bool* overflow_, int32_t res_scale = 2) {
+  bool overflow = false;
+  gandiva::BasicDecimalScalar128 val(in, original_precision, original_scale);
+  auto out = gandiva::decimalops::Round(val, original_precision, res_scale, res_scale,
+                                        &overflow);
+  if (overflow) {
+    *overflow_ = true;
+  }
+  return arrow::Decimal128(out);
+}
diff --git a/native-sql-engine/cpp/src/precompile/gandiva_projector.cc b/native-sql-engine/cpp/src/precompile/gandiva_projector.cc
index 0b82a7c5e..73967c6be 100644
--- a/native-sql-engine/cpp/src/precompile/gandiva_projector.cc
+++ b/native-sql-engine/cpp/src/precompile/gandiva_projector.cc
@@ -17,7 +17,6 @@
 #include "precompile/gandiva_projector.h"
 
 #include <arrow/array.h>
-
 #include <arrow/record_batch.h>
 #include <arrow/type_fwd.h>
 #include <gandiva/projector.h>
diff --git a/native-sql-engine/cpp/src/precompile/sort.cc b/native-sql-engine/cpp/src/precompile/sort.cc
index de182017a..a9d1515ca 100644
--- a/native-sql-engine/cpp/src/precompile/sort.cc
+++ b/native-sql-engine/cpp/src/precompile/sort.cc
@@ -37,7 +37,8 @@ TYPED_ASC_SORT_IMPL(std::string)
 
 void sort_desc(ArrayItemIndex* begin, ArrayItemIndex* end,
                std::function<bool(ArrayItemIndex, ArrayItemIndex)> comp) {
-  // std::sort(begin, end, *comp.target<bool (*)(ArrayItemIndex, ArrayItemIndex)>());
+  // std::sort(begin, end, *comp.target<bool (*)(ArrayItemIndex,
+  // ArrayItemIndex)>());
   std::sort(begin, end, comp);
 }
 }  // namespace precompile
diff --git a/native-sql-engine/cpp/src/proto/protobuf_utils.h b/native-sql-engine/cpp/src/proto/protobuf_utils.h
index 170e1bfb8..39bd07b66 100644
--- a/native-sql-engine/cpp/src/proto/protobuf_utils.h
+++ b/native-sql-engine/cpp/src/proto/protobuf_utils.h
@@ -15,6 +15,12 @@
  * limitations under the License.
  */
 
+#include <arrow/builder.h>
+#include <arrow/record_batch.h>
+#include <arrow/type.h>
+#include <gandiva/arrow.h>
+#include <gandiva/gandiva_aliases.h>
+#include <gandiva/tree_expr_builder.h>
 
 #include <map>
 #include <memory>
@@ -24,14 +30,6 @@
 #include <utility>
 #include <vector>
 
-#include <arrow/builder.h>
-#include <arrow/record_batch.h>
-#include <arrow/type.h>
-
-#include <gandiva/arrow.h>
-#include <gandiva/gandiva_aliases.h>
-#include <gandiva/tree_expr_builder.h>
-
 #include "Exprs.pb.h"
 
 using gandiva::ConditionPtr;
diff --git a/native-sql-engine/cpp/src/shuffle/splitter.cc b/native-sql-engine/cpp/src/shuffle/splitter.cc
index d53bd40cb..b8bc0eba7 100644
--- a/native-sql-engine/cpp/src/shuffle/splitter.cc
+++ b/native-sql-engine/cpp/src/shuffle/splitter.cc
@@ -15,8 +15,7 @@
  * limitations under the License.
  */
 
-#include <memory>
-#include <utility>
+#include "shuffle/splitter.h"
 
 #include <arrow/ipc/writer.h>
 #include <arrow/memory_pool.h>
@@ -25,7 +24,9 @@
 #include <gandiva/projector.h>
 #include <gandiva/tree_expr_builder.h>
 
-#include "shuffle/splitter.h"
+#include <memory>
+#include <utility>
+
 #include "shuffle/utils.h"
 #include "utils/macros.h"
 
@@ -317,8 +318,8 @@ arrow::Status Splitter::Init() {
   sub_dir_selection_.assign(configured_dirs_.size(), 0);
 
   // Both data_file and shuffle_index_file should be set through jni.
-  // For test purpose, Create a temporary subdirectory in the system temporary dir with
-  // prefix "columnar-shuffle"
+  // For test purpose, Create a temporary subdirectory in the system temporary
+  // dir with prefix "columnar-shuffle"
   if (options_.data_file.length() == 0) {
     ARROW_ASSIGN_OR_RAISE(options_.data_file, CreateTempShuffleFile(configured_dirs_[0]));
   }
@@ -329,14 +330,14 @@ arrow::Status Splitter::Init() {
 
   if (options_.compression_type == arrow::Compression::FASTPFOR) {
     ARROW_ASSIGN_OR_RAISE(ipc_write_options.codec,
-    arrow::util::Codec::CreateInt32(arrow::Compression::FASTPFOR));
-    
+                          arrow::util::Codec::CreateInt32(arrow::Compression::FASTPFOR));
+
   } else if (options_.compression_type == arrow::Compression::LZ4_FRAME) {
     ARROW_ASSIGN_OR_RAISE(ipc_write_options.codec,
-    arrow::util::Codec::Create(arrow::Compression::LZ4_FRAME));
+                          arrow::util::Codec::Create(arrow::Compression::LZ4_FRAME));
   } else {
-    ARROW_ASSIGN_OR_RAISE(ipc_write_options.codec,
-    arrow::util::Codec::CreateInt32(arrow::Compression::UNCOMPRESSED) );
+    ARROW_ASSIGN_OR_RAISE(ipc_write_options.codec, arrow::util::Codec::CreateInt32(
+                                                       arrow::Compression::UNCOMPRESSED));
   }
 
   return arrow::Status::OK();
@@ -344,8 +345,8 @@ arrow::Status Splitter::Init() {
 
 int64_t Splitter::CompressedSize(const arrow::RecordBatch& rb) {
   auto payload = std::make_shared<arrow::ipc::IpcPayload>();
-  auto result = arrow::ipc::GetRecordBatchPayload(
-                           rb, options_.ipc_write_options, payload.get());
+  auto result =
+      arrow::ipc::GetRecordBatchPayload(rb, options_.ipc_write_options, payload.get());
   if (result.ok()) {
     return payload.get()->body_length;
   } else {
@@ -355,16 +356,17 @@ int64_t Splitter::CompressedSize(const arrow::RecordBatch& rb) {
 }
 
 arrow::Status Splitter::SetCompressType(arrow::Compression::type compressed_type) {
-   if (compressed_type == arrow::Compression::FASTPFOR) {
+  if (compressed_type == arrow::Compression::FASTPFOR) {
     ARROW_ASSIGN_OR_RAISE(options_.ipc_write_options.codec,
-    arrow::util::Codec::CreateInt32(arrow::Compression::FASTPFOR));
-    
+                          arrow::util::Codec::CreateInt32(arrow::Compression::FASTPFOR));
+
   } else if (compressed_type == arrow::Compression::LZ4_FRAME) {
     ARROW_ASSIGN_OR_RAISE(options_.ipc_write_options.codec,
-    arrow::util::Codec::Create(arrow::Compression::LZ4_FRAME));
+                          arrow::util::Codec::Create(arrow::Compression::LZ4_FRAME));
   } else {
-    ARROW_ASSIGN_OR_RAISE(options_.ipc_write_options.codec,
-    arrow::util::Codec::CreateInt32(arrow::Compression::UNCOMPRESSED) );
+    ARROW_ASSIGN_OR_RAISE(
+        options_.ipc_write_options.codec,
+        arrow::util::Codec::CreateInt32(arrow::Compression::UNCOMPRESSED));
   }
   return arrow::Status::OK();
 }
@@ -612,7 +614,8 @@ arrow::Status Splitter::SpillFixedSize(int64_t size, int64_t* actual) {
   while (current_spilled < size && try_count < 5) {
     try_count++;
     int64_t single_call_spilled;
-    ARROW_ASSIGN_OR_RAISE(int32_t spilled_partition_id, SpillLargestPartition(&single_call_spilled))
+    ARROW_ASSIGN_OR_RAISE(int32_t spilled_partition_id,
+                          SpillLargestPartition(&single_call_spilled))
     if (spilled_partition_id == -1) {
       break;
     }
@@ -655,8 +658,8 @@ arrow::Result<int32_t> Splitter::SpillLargestPartition(int64_t* size) {
 }
 
 arrow::Status Splitter::DoSplit(const arrow::RecordBatch& rb) {
-  // for the first input record batch, scan binary arrays and large binary arrays to get
-  // their empirical sizes
+  // for the first input record batch, scan binary arrays and large binary
+  // arrays to get their empirical sizes
   if (!empirical_size_calculated_) {
     auto num_rows = rb.num_rows();
     for (int i = 0; i < binary_array_idx_.size(); ++i) {
@@ -1070,15 +1073,14 @@ std::string Splitter::NextSpilledFileDir() {
   return spilled_file_dir;
 }
 
-arrow::Result<std::shared_ptr<arrow::ipc::IpcPayload>>
-Splitter::GetSchemaPayload() {
+arrow::Result<std::shared_ptr<arrow::ipc::IpcPayload>> Splitter::GetSchemaPayload() {
   if (schema_payload_ != nullptr) {
     return schema_payload_;
   }
   schema_payload_ = std::make_shared<arrow::ipc::IpcPayload>();
   arrow::ipc::DictionaryFieldMapper dict_file_mapper;  // unused
-  RETURN_NOT_OK(arrow::ipc::GetSchemaPayload(
-      *schema_, options_.ipc_write_options, dict_file_mapper, schema_payload_.get()));
+  RETURN_NOT_OK(arrow::ipc::GetSchemaPayload(*schema_, options_.ipc_write_options,
+                                             dict_file_mapper, schema_payload_.get()));
   return schema_payload_;
 }
 
@@ -1147,8 +1149,8 @@ arrow::Status HashSplitter::CreateProjector(
       default:
         hash = gandiva::TreeExprBuilder::MakeFunction("hash32", {expr->root(), hash},
                                                       arrow::int32());
-        /*return arrow::Status::NotImplemented("HashSplitter::CreateProjector doesn't
-           support type ", expr->result()->type()->ToString());*/
+        /*return arrow::Status::NotImplemented("HashSplitter::CreateProjector
+           doesn't support type ", expr->result()->type()->ToString());*/
     }
   }
   auto hash_expr =
diff --git a/native-sql-engine/cpp/src/shuffle/splitter.h b/native-sql-engine/cpp/src/shuffle/splitter.h
index 4edecbf19..5907dda42 100644
--- a/native-sql-engine/cpp/src/shuffle/splitter.h
+++ b/native-sql-engine/cpp/src/shuffle/splitter.h
@@ -17,9 +17,6 @@
 
 #pragma once
 
-#include <random>
-#include <utility>
-
 #include <arrow/filesystem/filesystem.h>
 #include <arrow/filesystem/localfs.h>
 #include <arrow/io/api.h>
@@ -28,6 +25,9 @@
 #include <gandiva/gandiva_aliases.h>
 #include <gandiva/projector.h>
 
+#include <random>
+#include <utility>
+
 #include "shuffle/type.h"
 #include "shuffle/utils.h"
 
@@ -48,19 +48,21 @@ class Splitter {
   virtual const std::shared_ptr<arrow::Schema>& input_schema() const { return schema_; }
 
   /**
-   * Split input record batch into partition buffers according to the computed partition
-   * id. The largest partition buffer will be spilled if memory allocation failure occurs.
+   * Split input record batch into partition buffers according to the computed
+   * partition id. The largest partition buffer will be spilled if memory
+   * allocation failure occurs.
    */
   virtual arrow::Status Split(const arrow::RecordBatch&);
-  
+
   /**
    * Compute the compresse size of record batch.
    */
   virtual int64_t CompressedSize(const arrow::RecordBatch&);
 
   /**
-   * For each partition, merge spilled file into shuffle data file and write any cached
-   * record batch to shuffle data file. Close all resources and collect metrics.
+   * For each partition, merge spilled file into shuffle data file and write any
+   * cached record batch to shuffle data file. Close all resources and collect
+   * metrics.
    */
   arrow::Status Stop();
 
@@ -130,18 +132,18 @@ class Splitter {
       const std::shared_ptr<ArrayType>& src_arr,
       const std::vector<std::shared_ptr<BuilderType>>& dst_builders, int64_t num_rows);
 
-  // Cache the partition buffer/builder as compressed record batch. If reset buffers, the
-  // partition buffer/builder will be set to nullptr.
-  // Two cases for caching the partition buffers as record batch:
+  // Cache the partition buffer/builder as compressed record batch. If reset
+  // buffers, the partition buffer/builder will be set to nullptr. Two cases for
+  // caching the partition buffers as record batch:
   // 1. Split record batch. It first calculate whether the partition
-  // buffer can hold all data according to partition id. If not, call this method and
-  // allocate new buffers. Spill will happen if OOM.
+  // buffer can hold all data according to partition id. If not, call this
+  // method and allocate new buffers. Spill will happen if OOM.
   // 2. Stop the splitter. The record batch will be written to disk immediately.
   arrow::Status CacheRecordBatch(int32_t partition_id, bool reset_buffers);
 
   // Allocate new partition buffer/builder.
-  // If successful, will point partition buffer/builder to new ones, otherwise will
-  // spill the largest partition and retry
+  // If successful, will point partition buffer/builder to new ones, otherwise
+  // will spill the largest partition and retry
   arrow::Status AllocateNew(int32_t partition_id, int32_t new_size);
 
   // Allocate new partition buffer/builder. May return OOM status.
diff --git a/native-sql-engine/cpp/src/shuffle/type.h b/native-sql-engine/cpp/src/shuffle/type.h
index 96740edfe..8a879bc9c 100644
--- a/native-sql-engine/cpp/src/shuffle/type.h
+++ b/native-sql-engine/cpp/src/shuffle/type.h
@@ -18,9 +18,10 @@
 #pragma once
 
 #include <arrow/extension_type.h>
+#include <arrow/ipc/options.h>
 #include <arrow/type.h>
 #include <arrow/util/logging.h>
-#include <arrow/ipc/options.h>
+
 #include <deque>
 
 namespace sparkcolumnarplugin {
@@ -47,8 +48,7 @@ struct SplitOptions {
 
   arrow::MemoryPool* memory_pool = arrow::default_memory_pool();
 
-  arrow::ipc::IpcWriteOptions ipc_write_options =
-      arrow::ipc::IpcWriteOptions::Defaults();
+  arrow::ipc::IpcWriteOptions ipc_write_options = arrow::ipc::IpcWriteOptions::Defaults();
 
   static SplitOptions Defaults();
 };
diff --git a/native-sql-engine/cpp/src/shuffle/utils.h b/native-sql-engine/cpp/src/shuffle/utils.h
index 89ea45213..55adb4fc0 100644
--- a/native-sql-engine/cpp/src/shuffle/utils.h
+++ b/native-sql-engine/cpp/src/shuffle/utils.h
@@ -17,19 +17,19 @@
 
 #pragma once
 
-#include <chrono>
-#include <iomanip>
-#include <iostream>
-#include <sstream>
-#include <thread>
-
 #include <arrow/filesystem/filesystem.h>
 #include <arrow/filesystem/localfs.h>
 #include <arrow/filesystem/path_util.h>
 #include <arrow/ipc/writer.h>
 #include <arrow/util/io_util.h>
+
 #include <boost/uuid/uuid_generators.hpp>
 #include <boost/uuid/uuid_io.hpp>
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <thread>
 
 namespace sparkcolumnarplugin {
 namespace shuffle {
diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate.cc
index ecb01a38c..28e85509b 100644
--- a/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate.cc
+++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_aggregate.cc
@@ -109,17 +109,23 @@ TEST(TestArrowCompute, AggregateTest) {
   std::vector<std::string> input_data_string = {
       "[1, 2, 3, 4, 5, null, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
       "[1, 2, 3, 4, 5, null, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]"};
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
   std::vector<std::string> input_data_2_string = {
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
-      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, 46]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]"};
+      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, "
+      "46]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]"};
   MakeInputBatch(input_data_2_string, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
 
@@ -221,18 +227,24 @@ TEST(TestArrowCompute, GroupByAggregateTest) {
   std::vector<std::string> input_data = {
       "[1, 2, 3, 4, 5, null, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
       "[1, 2, 3, 4, 5, 5, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]"};
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]"};
   MakeInputBatch(input_data, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
 
   std::vector<std::string> input_data_2 = {
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
-      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, 46]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]"};
+      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, "
+      "46]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]"};
   MakeInputBatch(input_data_2, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
 
@@ -243,8 +255,10 @@ TEST(TestArrowCompute, GroupByAggregateTest) {
       "[1, 2, 3, 4, 5, null, 6, 7, 8, 9, 10]",
       "[25, 18, 12, 64, 125, 5, 150, 63, 32, 144, 360]",
       "[1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10]",
-      "[16.4, 6.5, 5, 5.875, 5.48, null, 6.1, 6.61905, 3.0625, 2.63889, 2.06667]",
-      "[8.49255, 6.93137, 7.6489, 13.5708, 17.4668, 1.41421, 8.52779, 6.23633, 5.58903, "
+      "[16.4, 6.5, 5, 5.875, 5.48, null, 6.1, 6.61905, 3.0625, 2.63889, "
+      "2.06667]",
+      "[8.49255, 6.93137, 7.6489, 13.5708, 17.4668, 1.41421, 8.52779, 6.23633, "
+      "5.58903, "
       "12.535, 24.3544]"};
   auto res_sch = arrow::schema(ret_types);
   MakeInputBatch(expected_result_string, res_sch, &expected_result);
@@ -416,9 +430,12 @@ TEST(TestArrowCompute, GroupByTwoAggregateTest) {
   std::vector<std::string> input_data = {
       "[1, 2, 3, 4, 5, null, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
       "[1, 2, 3, 4, 5, 5, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
       R"(["BJ", "SH", "HZ", "BH", "NY", "SH", "BH", "BJ", "SH", "SH", "BJ", "BJ", "BJ", "BH", "BH", "HZ", "NY", "NY", "NY", "NY"])"};
   MakeInputBatch(input_data, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
@@ -426,9 +443,12 @@ TEST(TestArrowCompute, GroupByTwoAggregateTest) {
   std::vector<std::string> input_data_2 = {
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
-      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, 46]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
+      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, "
+      "46]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
       R"(["BJ", "SH", "TK", "SH", "PH", "PH", "SH", "BJ", "SH", "SH", "BJ", "BJ", "BJ", "SH", "SH", "TK", "PH", "PH", "PH", "PH"])"};
   MakeInputBatch(input_data_2, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
@@ -443,8 +463,10 @@ TEST(TestArrowCompute, GroupByTwoAggregateTest) {
       "[5, 3, 2, 4, 5, 1, 5, 3, 2, 4, 6]",
       "[1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10]",
       "[1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10]",
-      "[16.4, 6.5, 5, 5.875, 5.48, null, 6.1, 6.61905, 3.0625, 2.63889, 2.06667]",
-      "[8.49255, 6.93137, 7.6489, 13.5708, 17.4668, 1.41421, 8.52779, 6.23633, 5.58903, "
+      "[16.4, 6.5, 5, 5.875, 5.48, null, 6.1, 6.61905, 3.0625, 2.63889, "
+      "2.06667]",
+      "[8.49255, 6.93137, 7.6489, 13.5708, 17.4668, 1.41421, 8.52779, 6.23633, "
+      "5.58903, "
       "12.535, 24.3544]"};
   auto res_sch = arrow::schema(ret_types);
   MakeInputBatch(expected_result_string, res_sch, &expected_result);
@@ -532,7 +554,8 @@ TEST(TestArrowCompute, GroupByHashAggregateWithCaseWhenTest) {
   std::vector<std::string> input_data_2 = {
       R"(["CD", "DL", "NY", "LA", "AU", "AU", "LA", "CD", "DL", "DL", "CD", "CD", "CD",
 "LA", "LA", "NY", "AU", "AU", "AU", "AU"])",
-      "[36, 49, 64, 81, 100, 100, 81, 36, 49, 49, 36, 36, 36, 81, 81, 64, 100, 100, 100, "
+      "[36, 49, 64, 81, 100, 100, 81, 36, 49, 49, 36, 36, 36, 81, 81, 64, 100, "
+      "100, 100, "
       "100]"};
   MakeInputBatch(input_data_2, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
@@ -540,7 +563,8 @@ TEST(TestArrowCompute, GroupByHashAggregateWithCaseWhenTest) {
   std::vector<std::string> input_data_3 = {
       R"(["BJ", "SH", "SZ", "NY", "WH", "WH", "AU", "BJ", "SH", "DL", "CD", "CD", "BJ",
 "LA", "HZ", "LA", "WH", "NY", "WH", "WH"])",
-      "[1, 4, 9, 64, 25, 25, 100, 1, 4, 49, 36, 36, 1, 81, 16, 81, 25, 64, 25, 25]"};
+      "[1, 4, 9, 64, 25, 25, 100, 1, 4, 49, 36, 36, 1, 81, 16, 81, 25, 64, 25, "
+      "25]"};
   MakeInputBatch(input_data_3, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
 
diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_check_condition.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_check_condition.cc
index d85742a91..c4140e2c7 100644
--- a/native-sql-engine/cpp/src/tests/arrow_compute_test_check_condition.cc
+++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_check_condition.cc
@@ -133,19 +133,23 @@ std::string ProduceCodes(std::string condition_check_str, std::string var_define
   ss << "class TESTCONDITION {" << std::endl;
   ss << " public:" << std::endl;
   if (!with_projection) {
-    ss << "  TESTCONDITION(std::vector<std::shared_ptr<arrow::RecordBatch>> table_0,"
+    ss << "  TESTCONDITION(std::vector<std::shared_ptr<arrow::RecordBatch>> "
+          "table_0,"
        << std::endl;
     ss << "                std::shared_ptr<arrow::RecordBatch> table_1) {" << std::endl;
     ss << var_prepare << std::endl;
     ss << "}" << std::endl;
   } else {
-    ss << "  TESTCONDITION(std::vector<std::shared_ptr<arrow::RecordBatch>> table_0,"
+    ss << "  TESTCONDITION(std::vector<std::shared_ptr<arrow::RecordBatch>> "
+          "table_0,"
        << std::endl;
     ss << "                std::shared_ptr<arrow::RecordBatch> table_1," << std::endl
-       << "                std::vector<std::vector<std::shared_ptr<arrow::Array>>> "
+       << "                "
+          "std::vector<std::vector<std::shared_ptr<arrow::Array>>> "
           "projected_0,"
        << std::endl
-       << "                std::vector<std::shared_ptr<arrow::Array>> projected_1) {"
+       << "                std::vector<std::shared_ptr<arrow::Array>> "
+          "projected_1) {"
        << std::endl;
     ss << var_prepare << std::endl;
     ss << "}" << std::endl;
@@ -163,11 +167,13 @@ std::string ProduceCodes(std::string condition_check_str, std::string var_define
     ss << "extern \"C\" void DoTest(" << std::endl
        << "    std::vector<std::shared_ptr<arrow::RecordBatch>> table_0," << std::endl
        << "    std::shared_ptr<arrow::RecordBatch> table_1," << std::endl
-       << "    std::vector<std::vector<std::shared_ptr<arrow::Array>>> projected_0,"
+       << "    std::vector<std::vector<std::shared_ptr<arrow::Array>>> "
+          "projected_0,"
        << std::endl
        << "    std::vector<std::shared_ptr<arrow::Array>> projected_1," << std::endl
        << "    std::vector<bool>* res) {" << std::endl;
-    ss << "  auto test = TESTCONDITION(table_0, table_1, projected_0, projected_1);"
+    ss << "  auto test = TESTCONDITION(table_0, table_1, projected_0, "
+          "projected_1);"
        << std::endl;
   }
   ss << "  int x = 0;" << std::endl;
diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_join_smj.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_join_smj.cc
index d89a3eab7..1cdcd735c 100644
--- a/native-sql-engine/cpp/src/tests/arrow_compute_test_join_smj.cc
+++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_join_smj.cc
@@ -65,8 +65,8 @@ TEST(TestArrowComputeMergeJoin, JoinTestUsingInnerJoin) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> expr_probe;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(),
-      schema_table_0, {probeArrays_expr},
+  ASSERT_NOT_OK(CreateCodeGenerator(
+      ctx.memory_pool(), schema_table_0, {probeArrays_expr},
       {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_probe, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -171,8 +171,8 @@ TEST(TestArrowComputeMergeJoin, JoinTestUsingOuterJoin) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> expr_probe;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(),
-      schema_table_0, {probeArrays_expr},
+  ASSERT_NOT_OK(CreateCodeGenerator(
+      ctx.memory_pool(), schema_table_0, {probeArrays_expr},
       {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_probe, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -207,7 +207,8 @@ TEST(TestArrowComputeMergeJoin, JoinTestUsingOuterJoin) {
   std::vector<std::shared_ptr<RecordBatch>> expected_table;
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      "[0, 0, null, 2, 2, 3, null, 5, null]", "[null, null, null, 2, 2, 3, null, 5, null]",
+      "[0, 0, null, 2, 2, 3, null, 5, null]",
+      "[null, null, null, 2, 2, 3, null, 5, null]",
       "[null, null, null, 2, 2, 3, null, 5, null]", "[0, 0, 1, 2, 2, 3, 4, 5, 6]",
       "[null, null, 1, 2, 2, 3, 4, 5, 6]"};
   auto res_sch = arrow::schema({f_res, f_res, f_res, f_res, f_res});
@@ -281,7 +282,7 @@ TEST(TestArrowComputeMergeJoin, JoinTestUsingAntiJoin) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> expr_probe;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(),schema_table_0, {probeArrays_expr},
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {probeArrays_expr},
                                     {table1_f0, table1_f1}, &expr_probe, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -383,7 +384,7 @@ TEST(TestArrowComputeMergeJoin, JoinTestUsingSemiJoin) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> expr_probe;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(),schema_table_0, {probeArrays_expr},
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {probeArrays_expr},
                                     {table1_f0, table1_f1}, &expr_probe, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -490,7 +491,7 @@ TEST(TestArrowComputeMergeJoin, JoinTestUsingSemiJoinWithCondition) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> expr_probe;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(),schema_table_0, {probeArrays_expr},
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {probeArrays_expr},
                                     {table1_f0, table1_f1}, &expr_probe, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -597,8 +598,8 @@ TEST(TestArrowComputeMergeJoin, JoinTestUsingInnerJoinWithCondition) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> expr_probe;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(),
-      schema_table_0, {probeArrays_expr},
+  ASSERT_NOT_OK(CreateCodeGenerator(
+      ctx.memory_pool(), schema_table_0, {probeArrays_expr},
       {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_probe, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -707,8 +708,8 @@ TEST(TestArrowComputeMergeJoin, JoinTestWithTwoKeysUsingInnerJoin) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> expr_probe;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(),
-      schema_table_0, {probeArrays_expr},
+  ASSERT_NOT_OK(CreateCodeGenerator(
+      ctx.memory_pool(), schema_table_0, {probeArrays_expr},
       {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_probe, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -817,7 +818,7 @@ TEST(TestArrowComputeMergeJoin, JoinTestUsingAntiJoinWithCondition) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> expr_probe;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(),schema_table_0, {probeArrays_expr},
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {probeArrays_expr},
                                     {table1_f0, table1_f1}, &expr_probe, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -921,8 +922,8 @@ TEST(TestArrowComputeMergeJoin, JoinTestUsingExistenceJoin) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> expr_probe;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(),
-      schema_table_0, {probeArrays_expr},
+  ASSERT_NOT_OK(CreateCodeGenerator(
+      ctx.memory_pool(), schema_table_0, {probeArrays_expr},
       {table1_f0, field("table1_exists", boolean()), table1_f1}, &expr_probe, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_join_wocg.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_join_wocg.cc
index 5324a6f21..9218d3d18 100644
--- a/native-sql-engine/cpp/src/tests/arrow_compute_test_join_wocg.cc
+++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_join_wocg.cc
@@ -49,40 +49,35 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestProjectKeyInnerJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_right_project_key = TreeExprBuilder::MakeFunction(
       "castBIGINT", {TreeExprBuilder::MakeField(table1_f0)}, uint64());
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction("codegen_right_key_schema",
-                                                   {n_right_project_key}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_add = TreeExprBuilder::MakeFunction(
-      "add",
-      {TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table1_f1)},
-      uint64());
-  auto n_condition = TreeExprBuilder::MakeFunction(
-      "greater_than", {n_add, TreeExprBuilder::MakeField(table0_f2)}, boolean());
-  auto n_hash_config = TreeExprBuilder::MakeFunction(
-      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)}, uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedProbeArraysInner",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config, n_condition},
-      uint32());
-  auto n_standalone =
-      TreeExprBuilder::MakeFunction("standalone", {n_probeArrays}, uint32());
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key =
+TreeExprBuilder::MakeFunction("codegen_right_key_schema", {n_right_project_key},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_add =
+TreeExprBuilder::MakeFunction( "add", {TreeExprBuilder::MakeField(table0_f1),
+TreeExprBuilder::MakeField(table1_f1)}, uint64()); auto n_condition =
+TreeExprBuilder::MakeFunction( "greater_than", {n_add,
+TreeExprBuilder::MakeField(table0_f2)}, boolean()); auto n_hash_config =
+TreeExprBuilder::MakeFunction( "build_keys_config_node",
+{TreeExprBuilder::MakeLiteral((int)0)}, uint32()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedProbeArraysInner", {n_left, n_right,
+n_left_key, n_right_key, n_result, n_hash_config, n_condition}, uint32()); auto
+n_standalone = TreeExprBuilder::MakeFunction("standalone", {n_probeArrays},
+uint32());
 
   auto probeArrays_expr = TreeExprBuilder::MakeExpression(n_standalone, f_res);
 
@@ -93,8 +88,8 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestProjectKeyInnerJoin) {
 
   std::shared_ptr<CodeGenerator> expr_probe;
   arrow::compute::ExecContext ctx;
-  auto result = CreateCodeGenerator(ctx.memory_pool(), schema_table_1, {probeArrays_expr},
-                                    {table1_f1, table0_f2}, &expr_probe, true);
+  auto result = CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
+{probeArrays_expr}, {table1_f1, table0_f2}, &expr_probe, true);
 }
 
 TEST(TestArrowComputeWSCG, JoinWOCGTestStringInnerJoin) {
@@ -108,34 +103,32 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestStringInnerJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key_func = TreeExprBuilder::MakeFunction(
-      "upper", {TreeExprBuilder::MakeField(table1_f0)}, utf8());
-  auto n_right_key = TreeExprBuilder::MakeFunction("codegen_right_key_schema",
-                                                   {n_right_key_func}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2), TreeExprBuilder::MakeField(table1_f0),
-       TreeExprBuilder::MakeField(table1_f1)},
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key_func = TreeExprBuilder::MakeFunction( "upper",
+{TreeExprBuilder::MakeField(table1_f0)}, utf8()); auto n_right_key =
+TreeExprBuilder::MakeFunction("codegen_right_key_schema", {n_right_key_func},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2),
+TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
       uint32());
   auto n_hash_config = TreeExprBuilder::MakeFunction(
-      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)}, uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
+      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)},
+uint32()); auto n_probeArrays = TreeExprBuilder::MakeFunction(
       "conditionedProbeArraysInner",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config}, uint32());
-  auto n_standalone =
-      TreeExprBuilder::MakeFunction("standalone", {n_probeArrays}, uint32());
+      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config},
+uint32()); auto n_standalone = TreeExprBuilder::MakeFunction("standalone",
+{n_probeArrays}, uint32());
 
   auto probeArrays_expr = TreeExprBuilder::MakeExpression(n_standalone, f_res);
 
@@ -146,16 +139,16 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestStringInnerJoin) {
 
   auto n_hash_kernel =
       TreeExprBuilder::MakeFunction("HashRelation", {n_left_key}, uint32());
-  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel}, uint32());
-  auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash, f_res);
-  std::shared_ptr<CodeGenerator> expr_build;
-  arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
-                                    {hashRelation_expr}, {}, &expr_build, true));
-  std::shared_ptr<CodeGenerator> expr_probe;
+  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel},
+uint32()); auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash,
+f_res); std::shared_ptr<CodeGenerator> expr_build; arrow::compute::ExecContext
+ctx; ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+                                    {hashRelation_expr}, {}, &expr_build,
+true)); std::shared_ptr<CodeGenerator> expr_probe;
   ASSERT_NOT_OK(CreateCodeGenerator(
       ctx.memory_pool(), schema_table_1, {probeArrays_expr},
-      {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_probe, true));
+      {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_probe,
+true));
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -164,15 +157,15 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestStringInnerJoin) {
   std::vector<std::shared_ptr<arrow::RecordBatch>> table_0;
   std::vector<std::shared_ptr<arrow::RecordBatch>> table_1;
 
-  std::vector<std::string> input_data_string = {R"(["BJ", "SH", "HZ", "BH", "NY", "SH"])",
-                                                R"(["A", "A", "C", "D", "C", "D"])",
+  std::vector<std::string> input_data_string = {R"(["BJ", "SH", "HZ", "BH",
+"NY", "SH"])", R"(["A", "A", "C", "D", "C", "D"])",
                                                 "[10, 3, 1, 2, 13, 11]"};
   MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
   input_data_string = {R"(["TK", "SH", "PH", "NJ", "NB", "SZ"])",
-                       R"(["F", "F", "A", "B", "D", "C"])", "[6, 12, 5, 8, 16, 110]"};
-  MakeInputBatch(input_data_string, schema_table_0, &input_batch);
+                       R"(["F", "F", "A", "B", "D", "C"])", "[6, 12, 5, 8, 16,
+110]"}; MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
   std::vector<std::string> input_data_2_string = {
@@ -193,12 +186,12 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestStringInnerJoin) {
       R"(["SH", "SH", "SH", "SZ", "BJ", "NY", "HZ"])",
       R"(["A", "D", "F", "C", "A", "C", "C"])", "[3, 11, 12, 110, 10, 13, 1]",
       R"(["sh", "sh", "sh", "sz", "bj", "ny", "hz"])", "[1, 1, 1, 2, 3, 5, 6]"};
-  auto res_sch = arrow::schema({table0_f0, table0_f1, table0_f2, table1_f0, table1_f1});
-  MakeInputBatch(expected_result_string, res_sch, &expected_result);
+  auto res_sch = arrow::schema({table0_f0, table0_f1, table0_f2, table1_f0,
+table1_f1}); MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
-  expected_result_string = {R"(["PH", "NJ", "SZ"])", R"(["A", "B", "C"])", "[5, 8, 110]",
-                            R"(["ph", "nj", "sz"])", "[7, null, 12]"};
+  expected_result_string = {R"(["PH", "NJ", "SZ"])", R"(["A", "B", "C"])", "[5,
+8, 110]", R"(["ph", "nj", "sz"])", "[7, null, 12]"};
   MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
@@ -241,37 +234,34 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestTwoStringInnerJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
       "codegen_left_key_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1)},
-      uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2), TreeExprBuilder::MakeField(table1_f0),
-       TreeExprBuilder::MakeField(table1_f1)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1)}, uint32()); auto n_right_key =
+TreeExprBuilder::MakeFunction( "codegen_right_key_schema",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_result =
+TreeExprBuilder::MakeFunction( "result", {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2),
+TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
       uint32());
   auto n_hash_config = TreeExprBuilder::MakeFunction(
-      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)}, uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
+      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)},
+uint32()); auto n_probeArrays = TreeExprBuilder::MakeFunction(
       "conditionedProbeArraysInner",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config}, uint32());
-  auto n_standalone =
-      TreeExprBuilder::MakeFunction("standalone", {n_probeArrays}, uint32());
-  auto probeArrays_expr = TreeExprBuilder::MakeExpression(n_standalone, f_res);
+      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config},
+uint32()); auto n_standalone = TreeExprBuilder::MakeFunction("standalone",
+{n_probeArrays}, uint32()); auto probeArrays_expr =
+TreeExprBuilder::MakeExpression(n_standalone, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -280,16 +270,16 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestTwoStringInnerJoin) {
 
   auto n_hash_kernel =
       TreeExprBuilder::MakeFunction("HashRelation", {n_left_key}, uint32());
-  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel}, uint32());
-  auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash, f_res);
-  std::shared_ptr<CodeGenerator> expr_build;
-  arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
-                                    {hashRelation_expr}, {}, &expr_build, true));
-  std::shared_ptr<CodeGenerator> expr_probe;
+  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel},
+uint32()); auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash,
+f_res); std::shared_ptr<CodeGenerator> expr_build; arrow::compute::ExecContext
+ctx; ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+                                    {hashRelation_expr}, {}, &expr_build,
+true)); std::shared_ptr<CodeGenerator> expr_probe;
   ASSERT_NOT_OK(CreateCodeGenerator(
       ctx.memory_pool(), schema_table_1, {probeArrays_expr},
-      {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_probe, true));
+      {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_probe,
+true));
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -308,8 +298,8 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestTwoStringInnerJoin) {
   MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
-  std::vector<std::string> input_data_2_string = {R"(["a", "b", "c", "d", "e", "f"])",
-                                                  R"(["A", "B", "C", "D", "F", "F"])"};
+  std::vector<std::string> input_data_2_string = {R"(["a", "b", "c", "d", "e",
+"f"])", R"(["A", "B", "C", "D", "F", "F"])"};
   MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
   table_1.push_back(input_batch);
 
@@ -328,8 +318,8 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestTwoStringInnerJoin) {
   MakeInputBatch(expected_result_string, schema_table, &expected_result);
   expected_table.push_back(expected_result);
 
-  expected_result_string = {R"(["j", "l", "n"])", R"(["J", "L", "N"])", "[8, 10, 12]",
-                            R"(["j", "l", "n"])", R"(["J", "L", "N"])"};
+  expected_result_string = {R"(["j", "l", "n"])", R"(["J", "L", "N"])", "[8, 10,
+12]", R"(["j", "l", "n"])", R"(["J", "L", "N"])"};
   MakeInputBatch(expected_result_string, schema_table, &expected_result);
   expected_table.push_back(expected_result);
 
@@ -372,29 +362,27 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestOuterJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_hash_config = TreeExprBuilder::MakeFunction(
-      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)}, uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedProbeArraysOuter",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config}, uint32());
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_hash_config =
+TreeExprBuilder::MakeFunction( "build_keys_config_node",
+{TreeExprBuilder::MakeLiteral((int)0)}, uint32()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedProbeArraysOuter", {n_left, n_right,
+n_left_key, n_right_key, n_result, n_hash_config}, uint32());
 
   auto n_standalone =
       TreeExprBuilder::MakeFunction("standalone", {n_probeArrays}, uint32());
@@ -407,16 +395,15 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestOuterJoin) {
 
   auto n_hash_kernel =
       TreeExprBuilder::MakeFunction("HashRelation", {n_left_key}, uint32());
-  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel}, uint32());
-  auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash, f_res);
-  std::shared_ptr<CodeGenerator> expr_build;
-  arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
-                                    {hashRelation_expr}, {}, &expr_build, true));
-  std::shared_ptr<CodeGenerator> expr_probe;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1, {probeArrays_expr},
-                                    {table0_f0, table0_f1, table0_f2, table1_f1},
-                                    &expr_probe, true));
+  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel},
+uint32()); auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash,
+f_res); std::shared_ptr<CodeGenerator> expr_build; arrow::compute::ExecContext
+ctx; ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+                                    {hashRelation_expr}, {}, &expr_build,
+true)); std::shared_ptr<CodeGenerator> expr_probe;
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
+{probeArrays_expr}, {table0_f0, table0_f1, table0_f2, table1_f1}, &expr_probe,
+true));
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -500,30 +487,27 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestAntiJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_hash_config = TreeExprBuilder::MakeFunction(
-      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)}, uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedProbeArraysAnti",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config}, uint32());
-  auto n_standalone =
-      TreeExprBuilder::MakeFunction("standalone", {n_probeArrays}, uint32());
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_hash_config =
+TreeExprBuilder::MakeFunction( "build_keys_config_node",
+{TreeExprBuilder::MakeLiteral((int)0)}, uint32()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedProbeArraysAnti", {n_left, n_right,
+n_left_key, n_right_key, n_result, n_hash_config}, uint32()); auto n_standalone
+= TreeExprBuilder::MakeFunction("standalone", {n_probeArrays}, uint32());
 
   auto probeArrays_expr = TreeExprBuilder::MakeExpression(n_standalone, f_res);
 
@@ -534,15 +518,14 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestAntiJoin) {
 
   auto n_hash_kernel =
       TreeExprBuilder::MakeFunction("HashRelation", {n_left_key}, uint32());
-  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel}, uint32());
-  auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash, f_res);
-  std::shared_ptr<CodeGenerator> expr_build;
-  arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
-                                    {hashRelation_expr}, {}, &expr_build, true));
-  std::shared_ptr<CodeGenerator> expr_probe;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1, {probeArrays_expr},
-                                    {table1_f0, table1_f1}, &expr_probe, true));
+  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel},
+uint32()); auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash,
+f_res); std::shared_ptr<CodeGenerator> expr_build; arrow::compute::ExecContext
+ctx; ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+                                    {hashRelation_expr}, {}, &expr_build,
+true)); std::shared_ptr<CodeGenerator> expr_probe;
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
+{probeArrays_expr}, {table1_f0, table1_f1}, &expr_probe, true));
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -622,31 +605,28 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestSemiJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_hash_config = TreeExprBuilder::MakeFunction(
-      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)}, uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedProbeArraysSemi",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config}, uint32());
-  auto n_standalone =
-      TreeExprBuilder::MakeFunction("standalone", {n_probeArrays}, uint32());
-  auto probeArrays_expr = TreeExprBuilder::MakeExpression(n_standalone, f_res);
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_hash_config =
+TreeExprBuilder::MakeFunction( "build_keys_config_node",
+{TreeExprBuilder::MakeLiteral((int)0)}, uint32()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedProbeArraysSemi", {n_left, n_right,
+n_left_key, n_right_key, n_result, n_hash_config}, uint32()); auto n_standalone
+= TreeExprBuilder::MakeFunction("standalone", {n_probeArrays}, uint32()); auto
+probeArrays_expr = TreeExprBuilder::MakeExpression(n_standalone, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -654,15 +634,14 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestSemiJoin) {
 
   auto n_hash_kernel =
       TreeExprBuilder::MakeFunction("HashRelation", {n_left_key}, uint32());
-  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel}, uint32());
-  auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash, f_res);
-  std::shared_ptr<CodeGenerator> expr_build;
-  arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
-                                    {hashRelation_expr}, {}, &expr_build, true));
-  std::shared_ptr<CodeGenerator> expr_probe;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1, {probeArrays_expr},
-                                    {table1_f0, table1_f1}, &expr_probe, true));
+  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel},
+uint32()); auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash,
+f_res); std::shared_ptr<CodeGenerator> expr_build; arrow::compute::ExecContext
+ctx; ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+                                    {hashRelation_expr}, {}, &expr_build,
+true)); std::shared_ptr<CodeGenerator> expr_probe;
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
+{probeArrays_expr}, {table1_f0, table1_f1}, &expr_probe, true));
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -682,8 +661,8 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestSemiJoin) {
   table_0.push_back(input_batch);
 
   std::vector<std::string> input_data_2_string = {"[1, 3, 4, 5, 6]",
-                                                  R"(["BJ", "TY", "NY", "SH", "HZ"])"};
-  MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
+                                                  R"(["BJ", "TY", "NY", "SH",
+"HZ"])"}; MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
   table_1.push_back(input_batch);
 
   input_data_2_string = {"[7, 8, 9, 10, 11, 12]",
@@ -697,8 +676,8 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestSemiJoin) {
   std::vector<std::shared_ptr<RecordBatch>> expected_table;
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {"[1, 3, 5, 6]",
-                                                     R"(["BJ", "TY", "SH", "HZ"])"};
-  MakeInputBatch(expected_result_string, res_sch, &expected_result);
+                                                     R"(["BJ", "TY", "SH",
+"HZ"])"}; MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
   expected_result_string = {"[8, 10, 12]", R"(["NY", "IT", "TL"])"};
@@ -746,31 +725,30 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestExistenceJoin) {
   auto f_exist = field("res", arrow::boolean());
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(f_exist),
-       TreeExprBuilder::MakeField(table1_f1)},
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(f_exist), TreeExprBuilder::MakeField(table1_f1)},
       uint32());
   auto n_hash_config = TreeExprBuilder::MakeFunction(
-      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)}, uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
+      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)},
+uint32()); auto n_probeArrays = TreeExprBuilder::MakeFunction(
       "conditionedProbeArraysExistence",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config}, uint32());
-  auto n_standalone =
-      TreeExprBuilder::MakeFunction("standalone", {n_probeArrays}, uint32());
-  auto probeArrays_expr = TreeExprBuilder::MakeExpression(n_standalone, f_res);
+      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config},
+uint32()); auto n_standalone = TreeExprBuilder::MakeFunction("standalone",
+{n_probeArrays}, uint32()); auto probeArrays_expr =
+TreeExprBuilder::MakeExpression(n_standalone, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -779,15 +757,14 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestExistenceJoin) {
 
   auto n_hash_kernel =
       TreeExprBuilder::MakeFunction("HashRelation", {n_left_key}, uint32());
-  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel}, uint32());
-  auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash, f_res);
-  std::shared_ptr<CodeGenerator> expr_build;
-  arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
-                                    {hashRelation_expr}, {}, &expr_build, true));
-  std::shared_ptr<CodeGenerator> expr_probe;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1, {probeArrays_expr},
-                                    {table1_f0, f_exist, table1_f1}, &expr_probe, true));
+  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel},
+uint32()); auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash,
+f_res); std::shared_ptr<CodeGenerator> expr_build; arrow::compute::ExecContext
+ctx; ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+                                    {hashRelation_expr}, {}, &expr_build,
+true)); std::shared_ptr<CodeGenerator> expr_probe;
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
+{probeArrays_expr}, {table1_f0, f_exist, table1_f1}, &expr_probe, true));
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -873,31 +850,30 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestExistenceJoin2) {
   auto f_exist = field("res", arrow::boolean());
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1),
-       TreeExprBuilder::MakeField(f_exist)},
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1), TreeExprBuilder::MakeField(f_exist)},
       uint32());
   auto n_hash_config = TreeExprBuilder::MakeFunction(
-      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)}, uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
+      "build_keys_config_node", {TreeExprBuilder::MakeLiteral((int)0)},
+uint32()); auto n_probeArrays = TreeExprBuilder::MakeFunction(
       "conditionedProbeArraysExistence",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config}, uint32());
-  auto n_standalone =
-      TreeExprBuilder::MakeFunction("standalone", {n_probeArrays}, uint32());
-  auto probeArrays_expr = TreeExprBuilder::MakeExpression(n_standalone, f_res);
+      {n_left, n_right, n_left_key, n_right_key, n_result, n_hash_config},
+uint32()); auto n_standalone = TreeExprBuilder::MakeFunction("standalone",
+{n_probeArrays}, uint32()); auto probeArrays_expr =
+TreeExprBuilder::MakeExpression(n_standalone, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -906,15 +882,14 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestExistenceJoin2) {
 
   auto n_hash_kernel =
       TreeExprBuilder::MakeFunction("HashRelation", {n_left_key}, uint32());
-  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel}, uint32());
-  auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash, f_res);
-  std::shared_ptr<CodeGenerator> expr_build;
-  arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
-                                    {hashRelation_expr}, {}, &expr_build, true));
-  std::shared_ptr<CodeGenerator> expr_probe;
-  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1, {probeArrays_expr},
-                                    {table1_f0, table1_f1, f_exist}, &expr_probe, true));
+  auto n_hash = TreeExprBuilder::MakeFunction("standalone", {n_hash_kernel},
+uint32()); auto hashRelation_expr = TreeExprBuilder::MakeExpression(n_hash,
+f_res); std::shared_ptr<CodeGenerator> expr_build; arrow::compute::ExecContext
+ctx; ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+                                    {hashRelation_expr}, {}, &expr_build,
+true)); std::shared_ptr<CodeGenerator> expr_probe;
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
+{probeArrays_expr}, {table1_f0, table1_f1, f_exist}, &expr_probe, true));
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
 
@@ -1740,7 +1715,7 @@ TEST(TestArrowComputeWSCG, JoinWOCGTestSemiJoinType2WithUInt64) {
 }
 
 TEST(TestArrowComputeWSCG, JoinWOCGTestInnerJoinType2WithUInt16) {
-  return; // TODO() fix this test
+  return;  // TODO() fix this test
   ////////////////////// prepare expr_vector ///////////////////////
   auto table0_f0 = field("table0_f0", uint16());
   auto table0_f1 = field("table0_f1", uint32());
diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_precompile.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_precompile.cc
index c74f19822..1287e3322 100644
--- a/native-sql-engine/cpp/src/tests/arrow_compute_test_precompile.cc
+++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_precompile.cc
@@ -55,11 +55,10 @@ TEST(TestArrowCompute, ArithmeticDecimalTest) {
   auto res = castDECIMAL(left, left_precision, left_scale, out_precision, out_scale);
   ASSERT_EQ(res, arrow::Decimal128("32342423.0128750000"));
   bool overflow = false;
-  res = castDECIMALNullOnOverflow(left, left_precision, left_scale, out_precision, 
+  res = castDECIMALNullOnOverflow(left, left_precision, left_scale, out_precision,
                                   out_scale, &overflow);
   ASSERT_EQ(res, arrow::Decimal128("32342423.0128750000"));
-  res = add(left, left_precision, left_scale, right, right_precision, right_scale,
-            17, 9);
+  res = add(left, left_precision, left_scale, right, right_precision, right_scale, 17, 9);
   ASSERT_EQ(res, arrow::Decimal128("32344770.025749535"));
   res = subtract(left, left_precision, left_scale, right, right_precision, right_scale,
                  17, 9);
@@ -70,6 +69,10 @@ TEST(TestArrowCompute, ArithmeticDecimalTest) {
   res = divide(left, left_precision, left_scale, right, right_precision, right_scale,
                out_precision, out_scale, &overflow);
   ASSERT_EQ(res, arrow::Decimal128("13780.2495094037"));
+  res = round(left, left_precision, left_scale, &overflow, 4);
+  ASSERT_EQ(res, arrow::Decimal128("32342423.0129"));
+  res = arrow::Decimal128("-32342423.012875").Abs();
+  ASSERT_EQ(res, left);
 }
 
 TEST(TestArrowCompute, ArithmeticComparisonTest) {
diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_sort.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_sort.cc
index 478f6229f..7e6470d26 100644
--- a/native-sql-engine/cpp/src/tests/arrow_compute_test_sort.cc
+++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_sort.cc
@@ -35,28 +35,23 @@ TEST(TestArrowComputeSort, SortTestInPlaceNullsFirstAsc) {
   auto arg_0 = TreeExprBuilder::MakeField(f0);
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  
+
   auto f_res = field("res", uint32());
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());    
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0});
@@ -64,8 +59,8 @@ TEST(TestArrowComputeSort, SortTestInPlaceNullsFirstAsc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
@@ -92,10 +87,12 @@ TEST(TestArrowComputeSort, SortTestInPlaceNullsFirstAsc) {
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      "[null, null, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 21, "
+      "[null, null, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 17, 18, 19, "
+      "20, 21, "
       "23, 30, 32, 33, 35, 37, 42, 43, 50, 52, 59, 64, NaN, NaN, NaN]"};
   MakeInputBatch(expected_result_string, sch, &expected_result);
 
@@ -123,28 +120,23 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsLastAsc) {
   auto arg_0 = TreeExprBuilder::MakeField(f0);
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  
+
   auto f_res = field("res", uint32());
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {false_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0});
@@ -152,8 +144,8 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsLastAsc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
@@ -180,11 +172,13 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsLastAsc) {
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       "[1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 21, "
-      "23, 30, 32, 33, 35, 37, 42, 43, 50, 52, 59, 64, NaN, NaN, NaN, null, null]"};
+      "23, 30, 32, 33, 35, 37, 42, 43, 50, 52, 59, 64, NaN, NaN, NaN, null, "
+      "null]"};
   MakeInputBatch(expected_result_string, sch, &expected_result);
 
   for (auto batch : input_batch_list) {
@@ -211,28 +205,24 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsFirstDesc) {
   auto arg_0 = TreeExprBuilder::MakeField(f0);
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  
+
   auto f_res = field("res", uint32());
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {false_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir =
+      TreeExprBuilder::MakeFunction("sort_directions", {false_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0});
@@ -240,8 +230,8 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsFirstDesc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
@@ -268,10 +258,12 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsFirstDesc) {
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      "[null, null, NaN, NaN, NaN, 64, 59, 52, 50, 43, 42, 37, 35, 33, 32, 30, 23, "
+      "[null, null, NaN, NaN, NaN, 64, 59, 52, 50, 43, 42, 37, 35, 33, 32, 30, "
+      "23, "
       "21, 20, 19, 18, 17, 15, 13, 12, 11, 10, 9, 8, 7, 6, 4, 3, 2, 1]"};
   MakeInputBatch(expected_result_string, sch, &expected_result);
 
@@ -299,28 +291,24 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsLastDesc) {
   auto arg_0 = TreeExprBuilder::MakeField(f0);
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  
+
   auto f_res = field("res", uint32());
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {false_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {false_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir =
+      TreeExprBuilder::MakeFunction("sort_directions", {false_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0});
@@ -328,8 +316,8 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsLastDesc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
@@ -356,7 +344,8 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsLastDesc) {
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       "[NaN, NaN, NaN, 64, 59, 52, 50, 43, 42, 37, 35, 33, 32, 30, 23, 21, 20, "
@@ -387,28 +376,23 @@ TEST(TestArrowComputeSort, SortTestInplaceAsc) {
   auto arg_0 = TreeExprBuilder::MakeField(f0);
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  
+
   auto f_res = field("res", uint32());
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {false_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0});
@@ -416,8 +400,8 @@ TEST(TestArrowComputeSort, SortTestInplaceAsc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
@@ -444,10 +428,12 @@ TEST(TestArrowComputeSort, SortTestInplaceAsc) {
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      "[1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 21, 22, 23, "
+      "[1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 17, 18, 19, 20, 21, 22, "
+      "23, "
       "30, 32, 33, 35, 37, 41, 42, 43, 45, 50, 52, 59, 64, NaN, NaN]"};
   MakeInputBatch(expected_result_string, sch, &expected_result);
 
@@ -475,28 +461,24 @@ TEST(TestArrowComputeSort, SortTestInplaceDesc) {
   auto arg_0 = TreeExprBuilder::MakeField(f0);
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  
+
   auto f_res = field("res", uint32());
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {false_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {false_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir =
+      TreeExprBuilder::MakeFunction("sort_directions", {false_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0});
@@ -504,8 +486,8 @@ TEST(TestArrowComputeSort, SortTestInplaceDesc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
@@ -532,7 +514,8 @@ TEST(TestArrowComputeSort, SortTestInplaceDesc) {
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       "[NaN, NaN, 64, 59, 52, 50, 45, 43, 42, 41, 37, 35, 33, 32, 30, 23, "
@@ -567,10 +550,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstAsc) {
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
   auto n_dir = TreeExprBuilder::MakeFunction(
       "sort_directions", {TreeExprBuilder::MakeLiteral(true)}, uint32());
   auto n_nulls_order = TreeExprBuilder::MakeFunction(
@@ -580,10 +561,10 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstAsc) {
   auto do_codegen = TreeExprBuilder::MakeFunction(
       "codegen", {TreeExprBuilder::MakeLiteral(false)}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1});
@@ -591,8 +572,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstAsc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
@@ -622,7 +603,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstAsc) {
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       "[null, null, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 13, 15, 17, 18, 19, 21, "
@@ -661,10 +643,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsLastAsc) {
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
   auto n_dir = TreeExprBuilder::MakeFunction(
       "sort_directions", {TreeExprBuilder::MakeLiteral(true)}, uint32());
   auto n_nulls_order = TreeExprBuilder::MakeFunction(
@@ -672,12 +652,12 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsLastAsc) {
   auto NaN_check = TreeExprBuilder::MakeFunction(
       "NaN_check", {TreeExprBuilder::MakeLiteral(true)}, uint32());
   auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {TreeExprBuilder::MakeLiteral(false)}, uint32());    
+      "codegen", {TreeExprBuilder::MakeLiteral(false)}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1});
@@ -685,8 +665,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsLastAsc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
@@ -717,13 +697,15 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsLastAsc) {
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       "[1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 13, 15, 17, 18, 19, 21, 22, 23, 30, "
       "32, 33, 35, 37, 41, 42, 43, 50, 52, 59, 64, NaN, NaN, NaN, null, null]",
       "[2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 14, 16, 18, 19, 20, 22, 23, 24,"
-      "31, 33, 34, 36, 38, 42, 43, 44, 51, null, 60, 65, 21, null, 13, 34, 67]"};
+      "31, 33, 34, 36, 38, 42, 43, 44, 51, null, 60, 65, 21, null, 13, 34, "
+      "67]"};
   MakeInputBatch(expected_result_string, sch, &expected_result);
 
   for (auto batch : input_batch_list) {
@@ -754,10 +736,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstDesc) {
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
   auto n_dir = TreeExprBuilder::MakeFunction(
       "sort_directions", {TreeExprBuilder::MakeLiteral(false)}, uint32());
   auto n_nulls_order = TreeExprBuilder::MakeFunction(
@@ -767,10 +747,10 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstDesc) {
   auto do_codegen = TreeExprBuilder::MakeFunction(
       "codegen", {TreeExprBuilder::MakeLiteral(false)}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1});
@@ -778,8 +758,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstDesc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
@@ -810,12 +790,15 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstDesc) {
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      "[null ,null , NaN, NaN, NaN, 64 ,59 ,52 ,50 ,43 ,42 ,41 ,37 ,35 ,33 ,32 ,30 "
+      "[null ,null , NaN, NaN, NaN, 64 ,59 ,52 ,50 ,43 ,42 ,41 ,37 ,35 ,33 ,32 "
+      ",30 "
       ",23 ,22 ,21 ,19 ,18 ,17 ,15 ,13 , 11 ,10 ,9 ,8 ,7 ,6 ,4 ,3 ,2 ,1]",
-      "[34 ,67 ,13, null, 21, 65 ,60 ,null ,51 ,44 ,43 ,42 ,38 ,36 ,34 ,33 ,31 ,24 "
+      "[34 ,67 ,13, null, 21, 65 ,60 ,null ,51 ,44 ,43 ,42 ,38 ,36 ,34 ,33 ,31 "
+      ",24 "
       ",23 ,22 , 20 ,19 ,18 ,16 ,14 ,12 ,11 ,10 ,9 ,8 ,7 ,5 ,4 ,3 ,2]"};
   MakeInputBatch(expected_result_string, sch, &expected_result);
 
@@ -847,10 +830,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsLastDesc) {
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
   auto n_dir = TreeExprBuilder::MakeFunction(
       "sort_directions", {TreeExprBuilder::MakeLiteral(false)}, uint32());
   auto n_nulls_order = TreeExprBuilder::MakeFunction(
@@ -860,10 +841,10 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsLastDesc) {
   auto do_codegen = TreeExprBuilder::MakeFunction(
       "codegen", {TreeExprBuilder::MakeLiteral(false)}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1});
@@ -871,8 +852,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsLastDesc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
@@ -903,12 +884,15 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsLastDesc) {
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      "[NaN, NaN, NaN, 64 ,59 ,52 ,50 ,43 ,42 ,41 ,37 ,35 ,33 ,32 ,30 ,23 ,22 ,21 "
+      "[NaN, NaN, NaN, 64 ,59 ,52 ,50 ,43 ,42 ,41 ,37 ,35 ,33 ,32 ,30 ,23 ,22 "
+      ",21 "
       ",19 ,18 ,17 ,15 ,13 , 11 ,10 ,9 ,8 ,7 ,6 ,4 ,3 ,2 ,1, null, null]",
-      "[13, null, 21, 65 ,60 ,null ,51 ,44 ,43 ,42 ,38 ,36 ,34 ,33 ,31 ,24 ,23 ,22 "
+      "[13, null, 21, 65 ,60 ,null ,51 ,44 ,43 ,42 ,38 ,36 ,34 ,33 ,31 ,24 ,23 "
+      ",22 "
       ", 20 ,19 ,18 ,16 ,14 ,12 ,11 ,10 ,9 ,8 ,7 ,5 ,4 ,3 ,2, 34, 67]"};
   MakeInputBatch(expected_result_string, sch, &expected_result);
 
@@ -938,28 +922,24 @@ TEST(TestArrowComputeSort, SortTestOnekeyBooleanDesc) {
   auto arg_1 = TreeExprBuilder::MakeField(f1);
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  
+
   auto f_res = field("res", uint32());
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {false_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir =
+      TreeExprBuilder::MakeFunction("sort_directions", {false_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1});
@@ -967,46 +947,50 @@ TEST(TestArrowComputeSort, SortTestOnekeyBooleanDesc) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
   std::shared_ptr<ResultIteratorBase> sort_result_iterator_base;
 
-  std::vector<std::string> input_data_string = {"[true, false, false, false, true, true, false]", 
-                                                "[1, 2, 3, 4, 5, 6, 7]"};
+  std::vector<std::string> input_data_string = {
+      "[true, false, false, false, true, true, false]", "[1, 2, 3, 4, 5, 6, 7]"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_2 = {"[true, true, false, false, false, true, false]",
-                                                  "[4, 2, 6, 0, 1, 4, 12]"};
+  std::vector<std::string> input_data_string_2 = {
+      "[true, true, false, false, false, true, false]", "[4, 2, 6, 0, 1, 4, 12]"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_3 = {"[true, true, false, false, false, true, false]",
-                                                  "[6, 12, 16, 10, 11, 41, 2]"};
+  std::vector<std::string> input_data_string_3 = {
+      "[true, true, false, false, false, true, false]", "[6, 12, 16, 10, 11, 41, 2]"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_4 = {"[true, true, false, false, false, true, false]",
-                                                  "[8, 22, 45, 12, 78, 12, 32]"};
+  std::vector<std::string> input_data_string_4 = {
+      "[true, true, false, false, false, true, false]", "[8, 22, 45, 12, 78, 12, 32]"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_5 = {"[true, true, false, false, false, true, false]",
-                                                  "[18, 5, 6, 78, 11, 2, 12]"};
+  std::vector<std::string> input_data_string_5 = {
+      "[true, true, false, false, false, true, false]", "[18, 5, 6, 78, 11, 2, 12]"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      "[true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, "
-      "false, false, false, false, false, false, false, false, false, false, false, false, false, "
+      "[true, true, true, true, true, true, true, true, true, true, true, "
+      "true, true, true, true, "
+      "false, false, false, false, false, false, false, false, false, false, "
+      "false, false, false, "
       "false, false, false, false, false, false, false]",
-      "[18, 22, 8, 41, 1, 12, 12, 6, 4, 5, 2, 4, 6, 5, 2, 6, 10, 32, 78, 78, 11, 12, 12, 45, 2, "
+      "[18, 22, 8, 41, 1, 12, 12, 6, 4, 5, 2, 4, 6, 5, 2, 6, 10, 32, 78, 78, "
+      "11, 12, 12, 45, 2, "
       "11, 16, 12, 1, 0, 6, 7, 4, 3, 2]"};
   MakeInputBatch(expected_result_string, sch, &expected_result);
 
@@ -1041,23 +1025,18 @@ TEST(TestArrowComputeSort, SortTestOneKeyStr) {
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {false_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1});
@@ -1065,38 +1044,38 @@ TEST(TestArrowComputeSort, SortTestOneKeyStr) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
   std::shared_ptr<ResultIteratorBase> sort_result_iterator_base;
   std::vector<std::string> input_data_string = {
-    R"(["b", "q", "s", "t", null, null, "a"])",
-    R"(["a", "c", "e", "f", "g", null, "h"])"};
+      R"(["b", "q", "s", "t", null, null, "a"])",
+      R"(["a", "c", "e", "f", "g", null, "h"])"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_2 = {
-    R"([null, "f", "q", "d", "r", null, "g"])",
-    R"(["a", "c", "e", "f", null, "j", "h"])"};
+      R"([null, "f", "q", "d", "r", null, "g"])",
+      R"(["a", "c", "e", "f", null, "j", "h"])"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_3 = {
-    R"(["p", "q", "o", "e", null, null, "l"])",
-    R"(["a", "c", "e", "f", "g","j", null])"};
+      R"(["p", "q", "o", "e", null, null, "l"])",
+      R"(["a", "c", "e", "f", "g","j", null])"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_4 = {
-    R"(["q", "w", "z", "x", "y", null, "u"])",
-    R"(["a", "c", "e", "f", "g","j", "h"])"};
+      R"(["q", "w", "z", "x", "y", null, "u"])", R"(["a", "c", "e", "f", "g","j", "h"])"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_5 = {
-    R"(["a", "c", "b", "d", null, null, null])",
-    R"(["a", null, "e", "f", "g","j", "h"])"};
+      R"(["a", "c", "b", "d", null, null, null])",
+      R"(["a", null, "e", "f", "g","j", "h"])"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       R"(["a","a","b","b","c","d","d","e","f","g","l","o","p","q","q","q","q","r","s","t","u","w","x","y","z",null,null,null,null,null,null,null,null,null,null])",
@@ -1130,25 +1109,20 @@ TEST(TestArrowComputeSort, SortTestOneKeyWithProjection) {
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_projection = TreeExprBuilder::MakeFunction(
-      "upper", {arg_0}, utf8());
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {n_projection}, uint32());    
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {false_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_projection = TreeExprBuilder::MakeFunction("upper", {arg_0}, utf8());
+  auto n_key_func =
+      TreeExprBuilder::MakeFunction("key_function", {n_projection}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1});
@@ -1156,38 +1130,38 @@ TEST(TestArrowComputeSort, SortTestOneKeyWithProjection) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
   std::shared_ptr<ResultIteratorBase> sort_result_iterator_base;
   std::vector<std::string> input_data_string = {
-    R"(["B", "q", "s", "T", null, null, "a"])",
-    R"(["a", "c", "e", "f", "g", null, "h"])"};
+      R"(["B", "q", "s", "T", null, null, "a"])",
+      R"(["a", "c", "e", "f", "g", null, "h"])"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_2 = {
-    R"([null, "F", "Q", "d", "r", null, "g"])",
-    R"(["a", "c", "e", "f", null, "j", "h"])"};
+      R"([null, "F", "Q", "d", "r", null, "g"])",
+      R"(["a", "c", "e", "f", null, "j", "h"])"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_3 = {
-    R"(["p", "q", "o", "E", null, null, "l"])",
-    R"(["a", "c", "e", "f", "g","j", null])"};
+      R"(["p", "q", "o", "E", null, null, "l"])",
+      R"(["a", "c", "e", "f", "g","j", null])"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_4 = {
-    R"(["q", "W", "Z", "x", "y", null, "u"])",
-    R"(["a", "c", "e", "f", "g","j", "h"])"};
+      R"(["q", "W", "Z", "x", "y", null, "u"])", R"(["a", "c", "e", "f", "g","j", "h"])"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_5 = {
-    R"(["a", "C", "b", "D", null, null, null])",
-    R"(["a", null, "e", "f", "g","j", "h"])"};
+      R"(["a", "C", "b", "D", null, null, null])",
+      R"(["a", null, "e", "f", "g","j", "h"])"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       R"(["a","a","b","B","C","D","d","E","F","g","l","o","p","q","q","Q","q","r","s","T","u","W","x","y","Z",null,null,null,null,null,null,null,null,null,null])",
@@ -1223,23 +1197,21 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysNaN) {
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0, arg_1, arg_2}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0, arg_1, arg_2}, uint32());
+  auto n_key_func =
+      TreeExprBuilder::MakeFunction("key_function", {arg_0, arg_1, arg_2}, uint32());
+  auto n_key_field =
+      TreeExprBuilder::MakeFunction("key_field", {arg_0, arg_1, arg_2}, uint32());
   auto n_dir = TreeExprBuilder::MakeFunction(
       "sort_directions", {true_literal, false_literal, true_literal}, uint32());
   auto n_nulls_order = TreeExprBuilder::MakeFunction(
       "sort_nulls_order", {false_literal, true_literal, true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {true_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {true_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1, f2, f3});
@@ -1247,55 +1219,53 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysNaN) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
 
-  std::vector<std::string> input_data_string = {"[8, NaN, 4, 50, 52, 32, 11]",
-                                                R"([null, "a", "a", "b", "b","b", "b"])",
-                                                "[11, NaN, 5, 51, null, 33, 12]",
-                                                "[1, 3, 5, 10, null, 13, 2]"};
+  std::vector<std::string> input_data_string = {
+      "[8, NaN, 4, 50, 52, 32, 11]", R"([null, "a", "a", "b", "b","b", "b"])",
+      "[11, NaN, 5, 51, null, 33, 12]", "[1, 3, 5, 10, null, 13, 2]"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_2 = {"[1, 14, NaN, 42, 6, null, 2]",
-                                                  R"(["a", "a", null, "b", "b", "a", "b"])",
-                                                  "[2, null, 44, 43, 7, 34, 3]",
-                                                  "[9, 7, 5, 1, 5, null, 17]"};
+  std::vector<std::string> input_data_string_2 = {
+      "[1, 14, NaN, 42, 6, null, 2]", R"(["a", "a", null, "b", "b", "a", "b"])",
+      "[2, null, 44, 43, 7, 34, 3]", "[9, 7, 5, 1, 5, null, 17]"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_3 = {"[3, 64, 8, 7, 9, 8, NaN]",
-                                                  R"(["a", "a", "b", "b", "b","b", "b"])",
-                                                  "[4, 65, 16, 8, 10, 20, 34]",
-                                                  "[8, 6, 2, 3, 10, 12, 15]"};
+  std::vector<std::string> input_data_string_3 = {
+      "[3, 64, 8, 7, 9, 8, NaN]", R"(["a", "a", "b", "b", "b","b", "b"])",
+      "[4, 65, 16, 8, 10, 20, 34]", "[8, 6, 2, 3, 10, 12, 15]"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_4 = {"[23, 17, 41, 18, 20, 35, 30]",
-                                                  R"(["a", "a", "a", "b", "b","b", "b"])",
-                                                  "[24, 18, 42, NaN, 21, 36, 31]",
-                                                  "[15, 16, 2, 51, null, 33, 12]"};
+  std::vector<std::string> input_data_string_4 = {
+      "[23, 17, 41, 18, 20, 35, 30]", R"(["a", "a", "a", "b", "b","b", "b"])",
+      "[24, 18, 42, NaN, 21, 36, 31]", "[15, 16, 2, 51, null, 33, 12]"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_5 = {"[37, null, 22, 13, 8, 59, 21]",
-                                                  R"(["a", "b", "a", "b", "b","b", "b"])",
-                                                  "[38, 67, 23, 14, null, 60, 22]",
-                                                  "[16, 17, 5, 15, 9, null, 19]"};
+  std::vector<std::string> input_data_string_5 = {
+      "[37, null, 22, 13, 8, 59, 21]", R"(["a", "b", "a", "b", "b","b", "b"])",
+      "[38, 67, 23, 14, null, 60, 22]", "[16, 17, 5, 15, 9, null, 19]"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       "[1, 2, 3, 4, 6, 7, 8, 8, 8, 8, 9, 11, 13, 14, 17, 18, 20, 21, "
-      "22, 23, 30, 32, 35, 37, 41, 42, 50, 52, 59, 64, NaN, NaN, NaN, null, null]",
+      "22, 23, 30, 32, 35, 37, 41, 42, 50, 52, 59, 64, NaN, NaN, NaN, null, "
+      "null]",
       R"(["a","b","a","a","b","b", null,"b","b","b","b","b","b","a","a","b","b","b","a","a","b","b","b","a","a","b","b","b","b","a",null,"b","a","b","a"])",
-      "[2, 3, 4, 5, 7, 8, 11, null, 16, 20, 10, 12, 14, null, 18, NaN, 21, 22, 23, "
+      "[2, 3, 4, 5, 7, 8, 11, null, 16, 20, 10, 12, 14, null, 18, NaN, 21, 22, "
+      "23, "
       "24, 31, 33, 36, 38, 42, 43, 51, null, 60, 65, 44, 34, NaN, 67, 34]",
       "[9, 17, 8, 5, 5, 3, 1, 9, 2, 12, 10, 2, 15, 7, 16, 51, null, 19, 5, "
       "15, 12, 13, 33, 16, 2, 1, 10, null, null, 6, 5, 15, 3, 17, null]"};
@@ -1341,45 +1311,49 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithProjection) {
 
   auto isnotnull_0 = TreeExprBuilder::MakeFunction(
       "isnotnull", {TreeExprBuilder::MakeField(f0)}, arrow::boolean());
-  auto coalesce_0 = TreeExprBuilder::MakeIf(
-      isnotnull_0, TreeExprBuilder::MakeField(f0), uint32_node, uint32());
-  auto isnull_0 = TreeExprBuilder::MakeFunction(
-      "isnull", {arg_0}, arrow::boolean());
+  auto coalesce_0 = TreeExprBuilder::MakeIf(isnotnull_0, TreeExprBuilder::MakeField(f0),
+                                            uint32_node, uint32());
+  auto isnull_0 = TreeExprBuilder::MakeFunction("isnull", {arg_0}, arrow::boolean());
 
   auto isnotnull_1 = TreeExprBuilder::MakeFunction(
       "isnotnull", {TreeExprBuilder::MakeField(f1)}, arrow::boolean());
-  auto coalesce_1 = TreeExprBuilder::MakeIf(
-      isnotnull_1, TreeExprBuilder::MakeField(f1), str_node, utf8());
-  auto isnull_1 = TreeExprBuilder::MakeFunction(
-      "isnull", {arg_1}, arrow::boolean());
-  
+  auto coalesce_1 = TreeExprBuilder::MakeIf(isnotnull_1, TreeExprBuilder::MakeField(f1),
+                                            str_node, utf8());
+  auto isnull_1 = TreeExprBuilder::MakeFunction("isnull", {arg_1}, arrow::boolean());
+
   auto isnotnull_2 = TreeExprBuilder::MakeFunction(
       "isnotnull", {TreeExprBuilder::MakeField(f2)}, arrow::boolean());
-  auto coalesce_2 = TreeExprBuilder::MakeIf(
-      isnotnull_2, TreeExprBuilder::MakeField(f2), uint32_node, uint32());
-  auto isnull_2 = TreeExprBuilder::MakeFunction(
-      "isnull", {arg_2}, arrow::boolean());
+  auto coalesce_2 = TreeExprBuilder::MakeIf(isnotnull_2, TreeExprBuilder::MakeField(f2),
+                                            uint32_node, uint32());
+  auto isnull_2 = TreeExprBuilder::MakeFunction("isnull", {arg_2}, arrow::boolean());
 
   auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", 
-      {coalesce_0, isnull_0, coalesce_1, isnull_1, coalesce_2, isnull_2}, uint32());
+      "key_function", {coalesce_0, isnull_0, coalesce_1, isnull_1, coalesce_2, isnull_2},
+      uint32());
   auto n_key_field = TreeExprBuilder::MakeFunction(
       "key_field", {arg_0, arg_0, arg_1, arg_1, arg_2, arg_2}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal, true_literal, false_literal, false_literal, 
-                          true_literal, true_literal,}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {false_literal, false_literal, true_literal, true_literal, 
-                           true_literal, true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {true_literal}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions",
+                                             {
+                                                 true_literal,
+                                                 true_literal,
+                                                 false_literal,
+                                                 false_literal,
+                                                 true_literal,
+                                                 true_literal,
+                                             },
+                                             uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order",
+                                    {false_literal, false_literal, true_literal,
+                                     true_literal, true_literal, true_literal},
+                                    uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {true_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1, f2, f3});
@@ -1388,57 +1362,56 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithProjection) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
 
-  std::vector<std::string> input_data_string = {"[8, 8, 4, 50, 52, 32, 11]",
-                                                R"([null, "b", "a", "b", "b","b", "b"])",
-                                                "[11, 10, 5, 51, null, 33, 12]",
-                                                "[1, 3, 5, 10, null, 13, 2]"};
+  std::vector<std::string> input_data_string = {
+      "[8, 8, 4, 50, 52, 32, 11]", R"([null, "b", "a", "b", "b","b", "b"])",
+      "[11, 10, 5, 51, null, 33, 12]", "[1, 3, 5, 10, null, 13, 2]"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_2 = {"[1, 14, 8, 42, 6, null, 2]",
-                                                  R"(["a", "a", null, "b", "b","b", "b"])",
-                                                  "[2, null, 44, 43, 7, 34, 3]",
-                                                  "[9, 7, 5, 1, 5, null, 17]"};
+  std::vector<std::string> input_data_string_2 = {
+      "[1, 14, 8, 42, 6, null, 2]", R"(["a", "a", null, "b", "b","b", "b"])",
+      "[2, null, 44, 43, 7, 34, 3]", "[9, 7, 5, 1, 5, null, 17]"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_3 = {"[3, 64, 8, 7, 9, 8, 33]",
-                                                  R"(["a", "a", "a", "b", "b","b", "b"])",
-                                                  "[4, 65, 16, 8, 10, 20, 34]",
-                                                  "[8, 6, 2, 3, 10, 12, 15]"};
+  std::vector<std::string> input_data_string_3 = {
+      "[3, 64, 8, 7, 9, 8, 33]", R"(["a", "a", "a", "b", "b","b", "b"])",
+      "[4, 65, 16, 8, 10, 20, 34]", "[8, 6, 2, 3, 10, 12, 15]"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_4 = {"[23, 17, 41, 18, 20, 35, 30]",
-                                                  R"(["a", "a", "a", "b", "b","b", "b"])",
-                                                  "[24, 18, 42, 19, 21, 36, 31]",
-                                                  "[15, 16, 2, 51, null, 33, 12]"};
+  std::vector<std::string> input_data_string_4 = {
+      "[23, 17, 41, 18, 20, 35, 30]", R"(["a", "a", "a", "b", "b","b", "b"])",
+      "[24, 18, 42, 19, 21, 36, 31]", "[15, 16, 2, 51, null, 33, 12]"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_5 = {"[37, null, 22, 13, 8, 59, 21]",
-                                                  R"(["a", "a", "a", "b", "b","b", "b"])",
-                                                  "[38, 67, 23, 14, null, 60, 22]",
-                                                  "[16, 17, 5, 15, 9, null, 19]"};
+  std::vector<std::string> input_data_string_5 = {
+      "[37, null, 22, 13, 8, 59, 21]", R"(["a", "a", "a", "b", "b","b", "b"])",
+      "[38, 67, 23, 14, null, 60, 22]", "[16, 17, 5, 15, 9, null, 19]"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      "[null, null, 1, 2, 3, 4, 6, 7, 8, 8, 8, 8, 8, 8, 9, 11, 13, 14, 17, 18, 20, 21, "
+      "[null, null, 1, 2, 3, 4, 6, 7, 8, 8, 8, 8, 8, 8, 9, 11, 13, 14, 17, 18, "
+      "20, 21, "
       "22, 23, 30, 32, 33, 35, 37, 41, 42, 50, 52, 59, 64]",
       R"(["b","a","a","b","a","a","b","b","b","b","b","a", null, null,"b","b","b","a","a","b","b","b","a","a","b","b","b","b","a","a","b","b","b","b","a"])",
-      "[34, 67, 2, 3, 4, 5, 7, 8, null, 10, 20, 16, 11, 44, 10, 12, 14, null, 18, 19, 21, 22, 23, "
+      "[34, 67, 2, 3, 4, 5, 7, 8, null, 10, 20, 16, 11, 44, 10, 12, 14, null, "
+      "18, 19, 21, 22, 23, "
       "24, 31, 33, 34, 36, 38, 42, 43, 51, null, 60, 65]",
-      "[null, 17, 9, 17, 8, 5, 5, 3, 9, 3, 12, 2, 1, 5, 10, 2, 15, 7, 16, 51, null, 19, 5, "
+      "[null, 17, 9, 17, 8, 5, 5, 3, 9, 3, 12, 2, 1, 5, 10, 2, 15, 7, 16, 51, "
+      "null, 19, 5, "
       "15, 12, 13, 15, 33, 16, 2, 1, 10, null, null, 6]"};
 
   MakeInputBatch(expected_result_string, ret_schema, &expected_result);
@@ -1476,23 +1449,21 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithoutCodegen) {
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0, arg_1, arg_2}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0, arg_1, arg_2}, uint32());
+  auto n_key_func =
+      TreeExprBuilder::MakeFunction("key_function", {arg_0, arg_1, arg_2}, uint32());
+  auto n_key_field =
+      TreeExprBuilder::MakeFunction("key_field", {arg_0, arg_1, arg_2}, uint32());
   auto n_dir = TreeExprBuilder::MakeFunction(
       "sort_directions", {true_literal, false_literal, true_literal}, uint32());
   auto n_nulls_order = TreeExprBuilder::MakeFunction(
       "sort_nulls_order", {false_literal, true_literal, true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1, f2, f3});
@@ -1500,57 +1471,55 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithoutCodegen) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
 
-  std::vector<std::string> input_data_string = {"[8, 9, 4, 50, 52, 32, 11]",
-                                                R"([null, "a", "a", "b", "b","b", "b"])",
-                                                "[11, 3, 5, 51, null, 33, 12]",
-                                                "[1, 3, 5, 10, null, 13, 2]"};
+  std::vector<std::string> input_data_string = {
+      "[8, 9, 4, 50, 52, 32, 11]", R"([null, "a", "a", "b", "b","b", "b"])",
+      "[11, 3, 5, 51, null, 33, 12]", "[1, 3, 5, 10, null, 13, 2]"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_2 = {"[1, 14, 6, 42, 6, null, 2]",
-                                                  R"(["a", "a", null, "b", "b", "a", "b"])",
-                                                  "[2, null, 44, 43, 7, 34, 3]",
-                                                  "[9, 7, 5, 1, 5, null, 17]"};
+  std::vector<std::string> input_data_string_2 = {
+      "[1, 14, 6, 42, 6, null, 2]", R"(["a", "a", null, "b", "b", "a", "b"])",
+      "[2, null, 44, 43, 7, 34, 3]", "[9, 7, 5, 1, 5, null, 17]"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_3 = {"[3, 64, 8, 7, 9, 8, 12]",
-                                                  R"(["a", "a", "b", "b", "b","b", "b"])",
-                                                  "[4, 65, 16, 8, 10, 20, 34]",
-                                                  "[8, 6, 2, 3, 10, 12, 15]"};
+  std::vector<std::string> input_data_string_3 = {
+      "[3, 64, 8, 7, 9, 8, 12]", R"(["a", "a", "b", "b", "b","b", "b"])",
+      "[4, 65, 16, 8, 10, 20, 34]", "[8, 6, 2, 3, 10, 12, 15]"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_4 = {"[23, 17, 41, 18, 20, 35, 30]",
-                                                  R"(["a", "a", "a", "b", "b","b", "b"])",
-                                                  "[24, 18, 42, 15, 21, 36, 31]",
-                                                  "[15, 16, 2, 51, null, 33, 12]"};
+  std::vector<std::string> input_data_string_4 = {
+      "[23, 17, 41, 18, 20, 35, 30]", R"(["a", "a", "a", "b", "b","b", "b"])",
+      "[24, 18, 42, 15, 21, 36, 31]", "[15, 16, 2, 51, null, 33, 12]"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_5 = {"[37, null, 22, 13, 8, 59, 21]",
-                                                  R"(["a", "b", "a", "b", "a","b", "b"])",
-                                                  "[38, 67, 23, 14, null, 60, 22]",
-                                                  "[16, 17, 5, 15, 9, null, 19]"};
-   MakeInputBatch(input_data_string_5, sch, &input_batch);
-   input_batch_list.push_back(input_batch);
+  std::vector<std::string> input_data_string_5 = {
+      "[37, null, 22, 13, 8, 59, 21]", R"(["a", "b", "a", "b", "a","b", "b"])",
+      "[38, 67, 23, 14, null, 60, 22]", "[16, 17, 5, 15, 9, null, 19]"};
+  MakeInputBatch(input_data_string_5, sch, &input_batch);
+  input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       "[1, 2, 3, 4, 6, 6, 7, 8, 8, 8, 8, 9, 9, 11, 12, 13, 14, 17, 18, 20, 21, "
       "22, 23, 30, 32, 35, 37, 41, 42, 50, 52, 59, 64, null, null]",
       R"(["a","b","a","a",null,"b","b",null,"b","b","a","b","a","b","b","b","a","a","b","b","b","a","a","b","b","b","a","a","b","b","b","b","a","b","a"])",
-      "[2, 3, 4, 5, 44, 7, 8, 11, 16, 20, null, 10, 3, 12, 34, 14, null, 18, 15, 21, 22, "
+      "[2, 3, 4, 5, 44, 7, 8, 11, 16, 20, null, 10, 3, 12, 34, 14, null, 18, "
+      "15, 21, 22, "
       "23, 24, 31, 33, 36, 38, 42, 43, 51, null, 60, 65, 67, 34]",
-      "[9, 17, 8, 5, 5, 5, 3, 1, 2, 12, 9, 10, 3, 2, 15, 15, 7, 16, 51, null, 19, 5, "
+      "[9, 17, 8, 5, 5, 5, 3, 1, 2, 12, 9, 10, 3, 2, 15, 15, 7, 16, 51, null, "
+      "19, 5, "
       "15, 12, 13, 33, 16, 2, 1, 10, null, null, 6, 17, null]"};
 
   MakeInputBatch(expected_result_string, sch, &expected_result);
@@ -1594,45 +1563,49 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithoutCodegenWithProjection) {
 
   auto isnotnull_0 = TreeExprBuilder::MakeFunction(
       "isnotnull", {TreeExprBuilder::MakeField(f0)}, arrow::boolean());
-  auto coalesce_0 = TreeExprBuilder::MakeIf(
-      isnotnull_0, TreeExprBuilder::MakeField(f0), uint32_node, uint32());
-  auto isnull_0 = TreeExprBuilder::MakeFunction(
-      "isnull", {arg_0}, arrow::boolean());
+  auto coalesce_0 = TreeExprBuilder::MakeIf(isnotnull_0, TreeExprBuilder::MakeField(f0),
+                                            uint32_node, uint32());
+  auto isnull_0 = TreeExprBuilder::MakeFunction("isnull", {arg_0}, arrow::boolean());
 
   auto isnotnull_1 = TreeExprBuilder::MakeFunction(
       "isnotnull", {TreeExprBuilder::MakeField(f1)}, arrow::boolean());
-  auto coalesce_1 = TreeExprBuilder::MakeIf(
-      isnotnull_1, TreeExprBuilder::MakeField(f1), str_node, utf8());
-  auto isnull_1 = TreeExprBuilder::MakeFunction(
-      "isnull", {arg_1}, arrow::boolean());
-  
+  auto coalesce_1 = TreeExprBuilder::MakeIf(isnotnull_1, TreeExprBuilder::MakeField(f1),
+                                            str_node, utf8());
+  auto isnull_1 = TreeExprBuilder::MakeFunction("isnull", {arg_1}, arrow::boolean());
+
   auto isnotnull_2 = TreeExprBuilder::MakeFunction(
       "isnotnull", {TreeExprBuilder::MakeField(f2)}, arrow::boolean());
-  auto coalesce_2 = TreeExprBuilder::MakeIf(
-      isnotnull_2, TreeExprBuilder::MakeField(f2), uint32_node, uint32());
-  auto isnull_2 = TreeExprBuilder::MakeFunction(
-      "isnull", {arg_2}, arrow::boolean());
+  auto coalesce_2 = TreeExprBuilder::MakeIf(isnotnull_2, TreeExprBuilder::MakeField(f2),
+                                            uint32_node, uint32());
+  auto isnull_2 = TreeExprBuilder::MakeFunction("isnull", {arg_2}, arrow::boolean());
 
   auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", 
-      {coalesce_0, isnull_0, coalesce_1, isnull_1, coalesce_2, isnull_2}, uint32());
+      "key_function", {coalesce_0, isnull_0, coalesce_1, isnull_1, coalesce_2, isnull_2},
+      uint32());
   auto n_key_field = TreeExprBuilder::MakeFunction(
       "key_field", {arg_0, arg_0, arg_1, arg_1, arg_2, arg_2}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal, true_literal, false_literal, false_literal, 
-                          true_literal, true_literal,}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {false_literal, false_literal, true_literal, true_literal, 
-                           true_literal, true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions",
+                                             {
+                                                 true_literal,
+                                                 true_literal,
+                                                 false_literal,
+                                                 false_literal,
+                                                 true_literal,
+                                                 true_literal,
+                                             },
+                                             uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order",
+                                    {false_literal, false_literal, true_literal,
+                                     true_literal, true_literal, true_literal},
+                                    uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1, f2, f3});
@@ -1641,57 +1614,56 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithoutCodegenWithProjection) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
 
-  std::vector<std::string> input_data_string = {"[8, 8, 4, 50, 52, 32, 11]",
-                                                R"([null, "b", "a", "b", "b","b", "b"])",
-                                                "[11, 10, 5, 51, null, 33, 12]",
-                                                "[1, 3, 5, 10, null, 13, 2]"};
+  std::vector<std::string> input_data_string = {
+      "[8, 8, 4, 50, 52, 32, 11]", R"([null, "b", "a", "b", "b","b", "b"])",
+      "[11, 10, 5, 51, null, 33, 12]", "[1, 3, 5, 10, null, 13, 2]"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_2 = {"[1, 14, 8, 42, 6, null, 2]",
-                                                  R"(["a", "a", null, "b", "b","b", "b"])",
-                                                  "[2, null, 44, 43, 7, 34, 3]",
-                                                  "[9, 7, 5, 1, 5, null, 17]"};
+  std::vector<std::string> input_data_string_2 = {
+      "[1, 14, 8, 42, 6, null, 2]", R"(["a", "a", null, "b", "b","b", "b"])",
+      "[2, null, 44, 43, 7, 34, 3]", "[9, 7, 5, 1, 5, null, 17]"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_3 = {"[3, 64, 8, 7, 9, 8, 33]",
-                                                  R"(["a", "a", "a", "b", "b","b", "b"])",
-                                                  "[4, 65, 16, 8, 10, 20, 34]",
-                                                  "[8, 6, 2, 3, 10, 12, 15]"};
+  std::vector<std::string> input_data_string_3 = {
+      "[3, 64, 8, 7, 9, 8, 33]", R"(["a", "a", "a", "b", "b","b", "b"])",
+      "[4, 65, 16, 8, 10, 20, 34]", "[8, 6, 2, 3, 10, 12, 15]"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_4 = {"[23, 17, 41, 18, 20, 35, 30]",
-                                                  R"(["a", "a", "a", "b", "b","b", "b"])",
-                                                  "[24, 18, 42, 19, 21, 36, 31]",
-                                                  "[15, 16, 2, 51, null, 33, 12]"};
+  std::vector<std::string> input_data_string_4 = {
+      "[23, 17, 41, 18, 20, 35, 30]", R"(["a", "a", "a", "b", "b","b", "b"])",
+      "[24, 18, 42, 19, 21, 36, 31]", "[15, 16, 2, 51, null, 33, 12]"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_5 = {"[37, null, 22, 13, 8, 59, 21]",
-                                                  R"(["a", "a", "a", "b", "b","b", "b"])",
-                                                  "[38, 67, 23, 14, null, 60, 22]",
-                                                  "[16, 17, 5, 15, 9, null, 19]"};
+  std::vector<std::string> input_data_string_5 = {
+      "[37, null, 22, 13, 8, 59, 21]", R"(["a", "a", "a", "b", "b","b", "b"])",
+      "[38, 67, 23, 14, null, 60, 22]", "[16, 17, 5, 15, 9, null, 19]"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      "[null, null, 1, 2, 3, 4, 6, 7, 8, 8, 8, 8, 8, 8, 9, 11, 13, 14, 17, 18, 20, 21, "
+      "[null, null, 1, 2, 3, 4, 6, 7, 8, 8, 8, 8, 8, 8, 9, 11, 13, 14, 17, 18, "
+      "20, 21, "
       "22, 23, 30, 32, 33, 35, 37, 41, 42, 50, 52, 59, 64]",
       R"(["b","a","a","b","a","a","b","b","b","b","b","a", null, null,"b","b","b","a","a","b","b","b","a","a","b","b","b","b","a","a","b","b","b","b","a"])",
-      "[34, 67, 2, 3, 4, 5, 7, 8, null, 10, 20, 16, 11, 44, 10, 12, 14, null, 18, 19, 21, 22, 23, "
+      "[34, 67, 2, 3, 4, 5, 7, 8, null, 10, 20, 16, 11, 44, 10, 12, 14, null, "
+      "18, 19, 21, 22, 23, "
       "24, 31, 33, 34, 36, 38, 42, 43, 51, null, 60, 65]",
-      "[null, 17, 9, 17, 8, 5, 5, 3, 9, 3, 12, 2, 1, 5, 10, 2, 15, 7, 16, 51, null, 19, 5, "
+      "[null, 17, 9, 17, 8, 5, 5, 3, 9, 3, 12, 2, 1, 5, 10, 2, 15, 7, 16, 51, "
+      "null, 19, 5, "
       "15, 12, 13, 15, 33, 16, 2, 1, 10, null, null, 6]"};
 
   MakeInputBatch(expected_result_string, ret_schema, &expected_result);
@@ -1729,23 +1701,21 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysNaNWithoutCodegen) {
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0, arg_1, arg_2}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0, arg_1, arg_2}, uint32());
+  auto n_key_func =
+      TreeExprBuilder::MakeFunction("key_function", {arg_0, arg_1, arg_2}, uint32());
+  auto n_key_field =
+      TreeExprBuilder::MakeFunction("key_field", {arg_0, arg_1, arg_2}, uint32());
   auto n_dir = TreeExprBuilder::MakeFunction(
       "sort_directions", {true_literal, false_literal, true_literal}, uint32());
   auto n_nulls_order = TreeExprBuilder::MakeFunction(
       "sort_nulls_order", {false_literal, true_literal, true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {true_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {true_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1, f2, f3});
@@ -1753,55 +1723,53 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysNaNWithoutCodegen) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
 
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
 
-  std::vector<std::string> input_data_string = {"[8, NaN, 4, 50, 52, 32, 11]",
-                                                R"([null, "a", "a", "b", "b","b", "b"])",
-                                                "[11, NaN, 5, 51, null, 33, 12]",
-                                                "[1, 3, 5, 10, null, 13, 2]"};
+  std::vector<std::string> input_data_string = {
+      "[8, NaN, 4, 50, 52, 32, 11]", R"([null, "a", "a", "b", "b","b", "b"])",
+      "[11, NaN, 5, 51, null, 33, 12]", "[1, 3, 5, 10, null, 13, 2]"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_2 = {"[1, 14, NaN, 42, 6, null, 2]",
-                                                  R"(["a", "a", null, "b", "b", "a", "b"])",
-                                                  "[2, null, 44, 43, 7, 34, 3]",
-                                                  "[9, 7, 5, 1, 5, null, 17]"};
+  std::vector<std::string> input_data_string_2 = {
+      "[1, 14, NaN, 42, 6, null, 2]", R"(["a", "a", null, "b", "b", "a", "b"])",
+      "[2, null, 44, 43, 7, 34, 3]", "[9, 7, 5, 1, 5, null, 17]"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_3 = {"[3, 64, 8, 7, 9, 8, NaN]",
-                                                  R"(["a", "a", "b", "b", "b","b", "b"])",
-                                                  "[4, 65, 16, 8, 10, 20, 34]",
-                                                  "[8, 6, 2, 3, 10, 12, 15]"};
+  std::vector<std::string> input_data_string_3 = {
+      "[3, 64, 8, 7, 9, 8, NaN]", R"(["a", "a", "b", "b", "b","b", "b"])",
+      "[4, 65, 16, 8, 10, 20, 34]", "[8, 6, 2, 3, 10, 12, 15]"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_4 = {"[23, 17, 41, 18, 20, 35, 30]",
-                                                  R"(["a", "a", "a", "b", "b","b", "b"])",
-                                                  "[24, 18, 42, NaN, 21, 36, 31]",
-                                                  "[15, 16, 2, 51, null, 33, 12]"};
+  std::vector<std::string> input_data_string_4 = {
+      "[23, 17, 41, 18, 20, 35, 30]", R"(["a", "a", "a", "b", "b","b", "b"])",
+      "[24, 18, 42, NaN, 21, 36, 31]", "[15, 16, 2, 51, null, 33, 12]"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  std::vector<std::string> input_data_string_5 = {"[37, null, 22, 13, 8, 59, 21]",
-                                                  R"(["a", "b", "a", "b", "b","b", "b"])",
-                                                  "[38, 67, 23, 14, null, 60, 22]",
-                                                  "[16, 17, 5, 15, 9, null, 19]"};
+  std::vector<std::string> input_data_string_5 = {
+      "[37, null, 22, 13, 8, 59, 21]", R"(["a", "b", "a", "b", "b","b", "b"])",
+      "[38, 67, 23, 14, null, 60, 22]", "[16, 17, 5, 15, 9, null, 19]"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
 
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       "[1, 2, 3, 4, 6, 7, 8, 8, 8, 8, 9, 11, 13, 14, 17, 18, 20, 21, "
-      "22, 23, 30, 32, 35, 37, 41, 42, 50, 52, 59, 64, NaN, NaN, NaN, null, null]",
+      "22, 23, 30, 32, 35, 37, 41, 42, 50, 52, 59, 64, NaN, NaN, NaN, null, "
+      "null]",
       R"(["a","b","a","a","b","b", null,"b","b","b","b","b","b","a","a","b","b","b","a","a","b","b","b","a","a","b","b","b","b","a",null,"b","a","b","a"])",
-      "[2, 3, 4, 5, 7, 8, 11, null, 16, 20, 10, 12, 14, null, 18, NaN, 21, 22, 23, "
+      "[2, 3, 4, 5, 7, 8, 11, null, 16, 20, 10, 12, 14, null, 18, NaN, 21, 22, "
+      "23, "
       "24, 31, 33, 36, 38, 42, 43, 51, null, 60, 65, 44, 34, NaN, 67, 34]",
       "[9, 17, 8, 5, 5, 3, 1, 9, 2, 12, 10, 2, 15, 7, 16, 51, null, 19, 5, "
       "15, 12, 13, 33, 16, 2, 1, 10, null, null, 6, 5, 15, 3, 17, null]"};
@@ -1839,23 +1807,18 @@ TEST(TestArrowComputeSort, SortTestOneKeyDecimal) {
   auto indices_type = std::make_shared<FixedSizeBinaryType>(16);
   auto f_indices = field("indices", indices_type);
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {false_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {false_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1});
@@ -1863,46 +1826,47 @@ TEST(TestArrowComputeSort, SortTestOneKeyDecimal) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
   std::shared_ptr<ResultIteratorBase> sort_result_iterator_base;
   std::vector<std::string> input_data_string = {
-    R"(["132311.7856", "1311.7556", null, "311.2656", null, "811.3656", "532311.7986"])",
-    R"(["132361.65356", "1211.12256", "3311.45256", "3191.96156", "211.16536", "341.36526", "5311.56736"])"};
+      R"(["132311.7856", "1311.7556", null, "311.2656", null, "811.3656", "532311.7986"])",
+      R"(["132361.65356", "1211.12256", "3311.45256", "3191.96156", "211.16536", "341.36526", "5311.56736"])"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_2 = {
-    R"(["832312.2656", "5511.7856", "324311.8956", "11.1666", "121.5657", "861.6656", "6311.1236"])",
-    R"(["6761.19356", null, "50311.53256", "2591.26156", "451.16536", "2341.66526", "1211.78626"])"};
+      R"(["832312.2656", "5511.7856", "324311.8956", "11.1666", "121.5657", "861.6656", "6311.1236"])",
+      R"(["6761.19356", null, "50311.53256", "2591.26156", "451.16536", "2341.66526", "1211.78626"])"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_3 = {
-    R"(["1573.5343", "1678.6556", null, "355.7626", null, "1911.8426", "453113.3556"])",
-    R"(["132361.44356", "1211.44256", "3311.44256", "3191.46156", "211.46536", "341.46526", "5311.44446"])"};
+      R"(["1573.5343", "1678.6556", null, "355.7626", null, "1911.8426", "453113.3556"])",
+      R"(["132361.44356", "1211.44256", "3311.44256", "3191.46156", "211.46536", "341.46526", "5311.44446"])"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_4 = {
-    R"(["5467.4224", "12345.6546", "435.2543", "643.0000", "643.0001", "42342.5642", "42663.2675"])",
-    R"(["2545326.54763", "2456.63765", "56734.43767", "2364457.23545", "57648.45773", "356.04500", "36.46522"])"};
+      R"(["5467.4224", "12345.6546", "435.2543", "643.0000", "643.0001", "42342.5642", "42663.2675"])",
+      R"(["2545326.54763", "2456.63765", "56734.43767", "2364457.23545", "57648.45773", "356.04500", "36.46522"])"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_5 = {
-    R"([null, "43556.3466", "245.2455", "6423.2562", "6342.0001", "75783.4757", "747487.2365"])",
-    R"(["3452321.54346", "6351.53632", "36546.54356", "87584.53763", "45753.54676", "23.56743", "2.54732"])"};
+      R"([null, "43556.3466", "245.2455", "6423.2562", "6342.0001", "75783.4757", "747487.2365"])",
+      R"(["3452321.54346", "6351.53632", "36546.54356", "87584.53763", "45753.54676", "23.56743", "2.54732"])"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-    R"(["11.1666", "121.5657", "245.2455", "311.2656", "355.7626", "435.2543", "643.0000", "643.0001", 
+      R"(["11.1666", "121.5657", "245.2455", "311.2656", "355.7626", "435.2543", "643.0000", "643.0001", 
         "811.3656", "861.6656", "1311.7556", "1573.5343", "1678.6556", "1911.8426", "5467.4224", "5511.7856", 
         "6311.1236", "6342.0001", "6423.2562", "12345.6546", "42342.5642", "42663.2675", "43556.3466", 
         "75783.4757", "132311.7856", "324311.8956", "453113.3556", "532311.7986", "747487.2365", "832312.2656", 
          null, null, null, null, null])",
-    R"(["2591.26156", "451.16536", "36546.54356", "3191.96156", "3191.46156", "56734.43767", "2364457.23545", 
+      R"(["2591.26156", "451.16536", "36546.54356", "3191.96156", "3191.46156", "56734.43767", "2364457.23545", 
         "57648.45773", "341.36526", "2341.66526", "1211.12256", "132361.44356", "1211.44256", "341.46526", 
         "2545326.54763", null, "1211.78626", "45753.54676", "87584.53763", "2456.63765", "356.04500", "36.46522", 
         "6351.53632", "23.56743", "132361.65356", "50311.53256", "5311.44446", "5311.56736", "2.54732", "6761.19356", 
@@ -1938,23 +1902,20 @@ TEST(TestArrowComputeSort, SortTestMulKeyDecimalCodegen) {
 
   auto f_res = field("res", uint32());
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0, arg_1}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0, arg_1}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal, false_literal}, uint32());
+  auto n_key_func =
+      TreeExprBuilder::MakeFunction("key_function", {arg_0, arg_1}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0, arg_1}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions",
+                                             {true_literal, false_literal}, uint32());
   auto n_nulls_order = TreeExprBuilder::MakeFunction(
       "sort_nulls_order", {false_literal, true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {false_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {true_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {true_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1, f2, f3});
@@ -1962,66 +1923,67 @@ TEST(TestArrowComputeSort, SortTestMulKeyDecimalCodegen) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
   std::shared_ptr<ResultIteratorBase> sort_result_iterator_base;
   std::vector<std::string> input_data_string = {
-    R"(["132311.7856", "861.6656", null, "311.2656", null, "811.3656", "532311.7986"])",
-    R"(["132361.65356", "1211.12256", "3311.45256", "3191.96156", "211.16536", "341.36526", "5311.56736"])",
-    R"(["143451.436", "1415.345", "1345.636", "42651.345", "212351.162", "3241.421", "2351.235"])",
-    R"(["1244213.66", "23545.52", "5251.56", "2351.96", "3631.76", "52.52", "3456.23"])"};
+      R"(["132311.7856", "861.6656", null, "311.2656", null, "811.3656", "532311.7986"])",
+      R"(["132361.65356", "1211.12256", "3311.45256", "3191.96156", "211.16536", "341.36526", "5311.56736"])",
+      R"(["143451.436", "1415.345", "1345.636", "42651.345", "212351.162", "3241.421", "2351.235"])",
+      R"(["1244213.66", "23545.52", "5251.56", "2351.96", "3631.76", "52.52", "3456.23"])"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_2 = {
-    R"(["832312.2656", "5511.7856", "324311.8956", "11.1666", "121.5657", "861.6656", "6311.1236"])",
-    R"(["6761.19356", null, "50311.53256", "2591.26156", "451.16536", "2341.66526", "1211.78626"])",
-    R"(["67261.156", null, "32542.352", "3251.226", "124.252", "5647.290", "3252.679"])",
-    R"(["26.11", null, "325.98", "51.86", "451.56", "53.52", "151.56"])"};
+      R"(["832312.2656", "5511.7856", "324311.8956", "11.1666", "121.5657", "861.6656", "6311.1236"])",
+      R"(["6761.19356", null, "50311.53256", "2591.26156", "451.16536", "2341.66526", "1211.78626"])",
+      R"(["67261.156", null, "32542.352", "3251.226", "124.252", "5647.290", "3252.679"])",
+      R"(["26.11", null, "325.98", "51.86", "451.56", "53.52", "151.56"])"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_3 = {
-    R"(["861.6656", "861.6656", null, "355.7626", null, "1911.8426", "453113.3556"])",
-    R"(["132361.44356", null, null, "3191.46156", "211.46536", "341.46526", "5311.44446"])",
-    R"(["34521.562", "42421.522", "4622.561", "3466.145", "22251.432", "2652.543", "52662.424"])",
-    R"(["535.23", "4241.34", "452.60", "542.66", "241.66", "421.96", "41.26"])"};
+      R"(["861.6656", "861.6656", null, "355.7626", null, "1911.8426", "453113.3556"])",
+      R"(["132361.44356", null, null, "3191.46156", "211.46536", "341.46526", "5311.44446"])",
+      R"(["34521.562", "42421.522", "4622.561", "3466.145", "22251.432", "2652.543", "52662.424"])",
+      R"(["535.23", "4241.34", "452.60", "542.66", "241.66", "421.96", "41.26"])"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_4 = {
-    R"(["5467.4224", null, "435.2543", "643.0000", "643.0001", "42342.5642", "42663.2675"])",
-    R"(["2545326.54763", null, "56734.43767", "2364457.23545", "57648.45773", "356.04500", "36.46522"])",
-    R"(["4352.432", "241.321", "46536.432", "6875.452", "6432.412", "141.664", "41.465"])",
-    R"(["42521.52", "21453.63", "6342.41", "63213.46", "63451.86", "2521.76", "2441.23"])"};
+      R"(["5467.4224", null, "435.2543", "643.0000", "643.0001", "42342.5642", "42663.2675"])",
+      R"(["2545326.54763", null, "56734.43767", "2364457.23545", "57648.45773", "356.04500", "36.46522"])",
+      R"(["4352.432", "241.321", "46536.432", "6875.452", "6432.412", "141.664", "41.465"])",
+      R"(["42521.52", "21453.63", "6342.41", "63213.46", "63451.86", "2521.76", "2441.23"])"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_5 = {
-    R"([null, "43556.3466", "245.2455", "6423.2562", "6342.0001", "75783.4757", "747487.2365"])",
-    R"(["3452321.54346", "6351.53632", "36546.54356", "87584.53763", "45753.54676", "23.56743", "2.54732"])",
-    R"(["4531.563", "642.674", "3526.756", "6436.234", "634.675", "532.875", "632.865"])",
-    R"(["653.86", "524.98", "632.97", "865.98", "867.96", "7554.43", "24.80"])"};
+      R"([null, "43556.3466", "245.2455", "6423.2562", "6342.0001", "75783.4757", "747487.2365"])",
+      R"(["3452321.54346", "6351.53632", "36546.54356", "87584.53763", "45753.54676", "23.56743", "2.54732"])",
+      R"(["4531.563", "642.674", "3526.756", "6436.234", "634.675", "532.875", "632.865"])",
+      R"(["653.86", "524.98", "632.97", "865.98", "867.96", "7554.43", "24.80"])"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-    R"(["11.1666", "121.5657", "245.2455", "311.2656", "355.7626", "435.2543", "643.0000", "643.0001", 
+      R"(["11.1666", "121.5657", "245.2455", "311.2656", "355.7626", "435.2543", "643.0000", "643.0001", 
         "811.3656", "861.6656", "861.6656", "861.6656", "861.6656", "1911.8426", "5467.4224", 
         "5511.7856", "6311.1236", "6342.0001", "6423.2562", "42342.5642", "42663.2675", "43556.3466", 
         "75783.4757", "132311.7856", "324311.8956", "453113.3556", "532311.7986", "747487.2365", "832312.2656", 
          null, null, null, null, null, null])",
-    R"(["2591.26156", "451.16536", "36546.54356", "3191.96156", "3191.46156", "56734.43767", "2364457.23545", "57648.45773", 
+      R"(["2591.26156", "451.16536", "36546.54356", "3191.96156", "3191.46156", "56734.43767", "2364457.23545", "57648.45773", 
         "341.36526", null, "132361.44356", "2341.66526", "1211.12256", "341.46526", "2545326.54763", 
          null, "1211.78626", "45753.54676", "87584.53763", "356.04500", "36.46522", "6351.53632", 
          "23.56743", "132361.65356", "50311.53256", "5311.44446", "5311.56736", "2.54732", "6761.19356",
          null, null, "3452321.54346", "3311.45256", "211.46536", "211.16536"])",
-    R"(["3251.226", "124.252", "3526.756", "42651.345", "3466.145", "46536.432", "6875.452", "6432.412", 
+      R"(["3251.226", "124.252", "3526.756", "42651.345", "3466.145", "46536.432", "6875.452", "6432.412", 
         "3241.421", "42421.522", "34521.562", "5647.290", "1415.345", "2652.543", "4352.432", 
         null, "3252.679", "634.675", "6436.234", "141.664", "41.465", "642.674", 
         "532.875", "143451.436", "32542.352", "52662.424", "2351.235", "632.865", "67261.156", 
         "241.321", "4622.561", "4531.563", "1345.636", "22251.432", "212351.162"])",
-    R"(["51.86", "451.56", "632.97", "2351.96", "542.66", "6342.41", "63213.46", "63451.86", 
+      R"(["51.86", "451.56", "632.97", "2351.96", "542.66", "6342.41", "63213.46", "63451.86", 
         "52.52", "4241.34", "535.23", "53.52", "23545.52", "421.96", "42521.52", 
         null, "151.56", "867.96", "865.98", "2521.76", "2441.23", "524.98", 
         "7554.43", "1244213.66", "325.98", "41.26", "3456.23", "24.80", "26.11", 
@@ -2057,23 +2019,20 @@ TEST(TestArrowComputeSort, SortTestMulKeyDecimalWithoutCodegen) {
 
   auto f_res = field("res", uint32());
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0, arg_1}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0, arg_1}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal, false_literal}, uint32());
+  auto n_key_func =
+      TreeExprBuilder::MakeFunction("key_function", {arg_0, arg_1}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0, arg_1}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions",
+                                             {true_literal, false_literal}, uint32());
   auto n_nulls_order = TreeExprBuilder::MakeFunction(
       "sort_nulls_order", {false_literal, true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {false_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0, f1, f2, f3});
@@ -2081,66 +2040,67 @@ TEST(TestArrowComputeSort, SortTestMulKeyDecimalWithoutCodegen) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
   std::shared_ptr<ResultIteratorBase> sort_result_iterator_base;
   std::vector<std::string> input_data_string = {
-    R"(["132311.7856", "861.6656", null, "311.2656", null, "811.3656", "532311.7986"])",
-    R"(["132361.65356", "1211.12256", "3311.45256", "3191.96156", "211.16536", "341.36526", "5311.56736"])",
-    R"(["143451.436", "1415.345", "1345.636", "42651.345", "212351.162", "3241.421", "2351.235"])",
-    R"(["1244213.66", "23545.52", "5251.56", "2351.96", "3631.76", "52.52", "3456.23"])"};
+      R"(["132311.7856", "861.6656", null, "311.2656", null, "811.3656", "532311.7986"])",
+      R"(["132361.65356", "1211.12256", "3311.45256", "3191.96156", "211.16536", "341.36526", "5311.56736"])",
+      R"(["143451.436", "1415.345", "1345.636", "42651.345", "212351.162", "3241.421", "2351.235"])",
+      R"(["1244213.66", "23545.52", "5251.56", "2351.96", "3631.76", "52.52", "3456.23"])"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_2 = {
-    R"(["832312.2656", "5511.7856", "324311.8956", "11.1666", "121.5657", "861.6656", "6311.1236"])",
-    R"(["6761.19356", null, "50311.53256", "2591.26156", "451.16536", "2341.66526", "1211.78626"])",
-    R"(["67261.156", null, "32542.352", "3251.226", "124.252", "5647.290", "3252.679"])",
-    R"(["26.11", null, "325.98", "51.86", "451.56", "53.52", "151.56"])"};
+      R"(["832312.2656", "5511.7856", "324311.8956", "11.1666", "121.5657", "861.6656", "6311.1236"])",
+      R"(["6761.19356", null, "50311.53256", "2591.26156", "451.16536", "2341.66526", "1211.78626"])",
+      R"(["67261.156", null, "32542.352", "3251.226", "124.252", "5647.290", "3252.679"])",
+      R"(["26.11", null, "325.98", "51.86", "451.56", "53.52", "151.56"])"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_3 = {
-    R"(["861.6656", "861.6656", null, "355.7626", null, "1911.8426", "453113.3556"])",
-    R"(["132361.44356", null, null, "3191.46156", "211.46536", "341.46526", "5311.44446"])",
-    R"(["34521.562", "42421.522", "4622.561", "3466.145", "22251.432", "2652.543", "52662.424"])",
-    R"(["535.23", "4241.34", "452.60", "542.66", "241.66", "421.96", "41.26"])"};
+      R"(["861.6656", "861.6656", null, "355.7626", null, "1911.8426", "453113.3556"])",
+      R"(["132361.44356", null, null, "3191.46156", "211.46536", "341.46526", "5311.44446"])",
+      R"(["34521.562", "42421.522", "4622.561", "3466.145", "22251.432", "2652.543", "52662.424"])",
+      R"(["535.23", "4241.34", "452.60", "542.66", "241.66", "421.96", "41.26"])"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_4 = {
-    R"(["5467.4224", null, "435.2543", "643.0000", "643.0001", "42342.5642", "42663.2675"])",
-    R"(["2545326.54763", null, "56734.43767", "2364457.23545", "57648.45773", "356.04500", "36.46522"])",
-    R"(["4352.432", "241.321", "46536.432", "6875.452", "6432.412", "141.664", "41.465"])",
-    R"(["42521.52", "21453.63", "6342.41", "63213.46", "63451.86", "2521.76", "2441.23"])"};
+      R"(["5467.4224", null, "435.2543", "643.0000", "643.0001", "42342.5642", "42663.2675"])",
+      R"(["2545326.54763", null, "56734.43767", "2364457.23545", "57648.45773", "356.04500", "36.46522"])",
+      R"(["4352.432", "241.321", "46536.432", "6875.452", "6432.412", "141.664", "41.465"])",
+      R"(["42521.52", "21453.63", "6342.41", "63213.46", "63451.86", "2521.76", "2441.23"])"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_5 = {
-    R"([null, "43556.3466", "245.2455", "6423.2562", "6342.0001", "75783.4757", "747487.2365"])",
-    R"(["3452321.54346", "6351.53632", "36546.54356", "87584.53763", "45753.54676", "23.56743", "2.54732"])",
-    R"(["4531.563", "642.674", "3526.756", "6436.234", "634.675", "532.875", "632.865"])",
-    R"(["653.86", "524.98", "632.97", "865.98", "867.96", "7554.43", "24.80"])"};
+      R"([null, "43556.3466", "245.2455", "6423.2562", "6342.0001", "75783.4757", "747487.2365"])",
+      R"(["3452321.54346", "6351.53632", "36546.54356", "87584.53763", "45753.54676", "23.56743", "2.54732"])",
+      R"(["4531.563", "642.674", "3526.756", "6436.234", "634.675", "532.875", "632.865"])",
+      R"(["653.86", "524.98", "632.97", "865.98", "867.96", "7554.43", "24.80"])"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-    R"(["11.1666", "121.5657", "245.2455", "311.2656", "355.7626", "435.2543", "643.0000", "643.0001", 
+      R"(["11.1666", "121.5657", "245.2455", "311.2656", "355.7626", "435.2543", "643.0000", "643.0001", 
         "811.3656", "861.6656", "861.6656", "861.6656", "861.6656", "1911.8426", "5467.4224", 
         "5511.7856", "6311.1236", "6342.0001", "6423.2562", "42342.5642", "42663.2675", "43556.3466", 
         "75783.4757", "132311.7856", "324311.8956", "453113.3556", "532311.7986", "747487.2365", "832312.2656", 
          null, null, null, null, null, null])",
-    R"(["2591.26156", "451.16536", "36546.54356", "3191.96156", "3191.46156", "56734.43767", "2364457.23545", "57648.45773", 
+      R"(["2591.26156", "451.16536", "36546.54356", "3191.96156", "3191.46156", "56734.43767", "2364457.23545", "57648.45773", 
         "341.36526", null, "132361.44356", "2341.66526", "1211.12256", "341.46526", "2545326.54763", 
          null, "1211.78626", "45753.54676", "87584.53763", "356.04500", "36.46522", "6351.53632", 
          "23.56743", "132361.65356", "50311.53256", "5311.44446", "5311.56736", "2.54732", "6761.19356",
          null, null, "3452321.54346", "3311.45256", "211.46536", "211.16536"])",
-    R"(["3251.226", "124.252", "3526.756", "42651.345", "3466.145", "46536.432", "6875.452", "6432.412", 
+      R"(["3251.226", "124.252", "3526.756", "42651.345", "3466.145", "46536.432", "6875.452", "6432.412", 
         "3241.421", "42421.522", "34521.562", "5647.290", "1415.345", "2652.543", "4352.432", 
         null, "3252.679", "634.675", "6436.234", "141.664", "41.465", "642.674", 
         "532.875", "143451.436", "32542.352", "52662.424", "2351.235", "632.865", "67261.156", 
         "4622.561", "241.321", "4531.563", "1345.636", "22251.432", "212351.162"])",
-    R"(["51.86", "451.56", "632.97", "2351.96", "542.66", "6342.41", "63213.46", "63451.86", 
+      R"(["51.86", "451.56", "632.97", "2351.96", "542.66", "6342.41", "63213.46", "63451.86", 
         "52.52", "4241.34", "535.23", "53.52", "23545.52", "421.96", "42521.52", 
         null, "151.56", "867.96", "865.98", "2521.76", "2441.23", "524.98", 
         "7554.43", "1244213.66", "325.98", "41.26", "3456.23", "24.80", "26.11", 
@@ -2170,23 +2130,18 @@ TEST(TestArrowComputeSort, SortTestInplaceDecimal) {
 
   auto f_res = field("res", uint32());
 
-  auto n_key_func = TreeExprBuilder::MakeFunction(
-      "key_function", {arg_0}, uint32());
-  auto n_key_field = TreeExprBuilder::MakeFunction(
-      "key_field", {arg_0}, uint32());
-  auto n_dir = TreeExprBuilder::MakeFunction(
-      "sort_directions", {true_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
-      "sort_nulls_order", {false_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction(
-      "NaN_check", {false_literal}, uint32());
-  auto do_codegen = TreeExprBuilder::MakeFunction(
-      "codegen", {false_literal}, uint32());
+  auto n_key_func = TreeExprBuilder::MakeFunction("key_function", {arg_0}, uint32());
+  auto n_key_field = TreeExprBuilder::MakeFunction("key_field", {arg_0}, uint32());
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
+  auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {false_literal}, uint32());
+  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
+  auto do_codegen = TreeExprBuilder::MakeFunction("codegen", {false_literal}, uint32());
   auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices", 
+      "sortArraysToIndices",
       {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
-  auto n_sort = TreeExprBuilder::MakeFunction(
-      "standalone", {n_sort_to_indices}, uint32());
+  auto n_sort =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices}, uint32());
   auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
 
   auto sch = arrow::schema({f0});
@@ -2194,36 +2149,37 @@ TEST(TestArrowComputeSort, SortTestInplaceDecimal) {
   ///////////////////// Calculation //////////////////
   std::shared_ptr<CodeGenerator> sort_expr;
   arrow::compute::ExecContext ctx;
-  ASSERT_NOT_OK(CreateCodeGenerator(
-      ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
+  ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
+                                    &sort_expr, true));
   std::shared_ptr<arrow::RecordBatch> input_batch;
   std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
   std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
   std::shared_ptr<ResultIteratorBase> sort_result_iterator_base;
   std::vector<std::string> input_data_string = {
-    R"(["132311.7856", "1311.7556", null, "311.2656", null, "811.3656", "532311.7986"])"};
+      R"(["132311.7856", "1311.7556", null, "311.2656", null, "811.3656", "532311.7986"])"};
   MakeInputBatch(input_data_string, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_2 = {
-    R"(["832312.2656", "5511.7856", "324311.8956", "11.1666", "121.5657", "861.6656", "6311.1236"])"};
+      R"(["832312.2656", "5511.7856", "324311.8956", "11.1666", "121.5657", "861.6656", "6311.1236"])"};
   MakeInputBatch(input_data_string_2, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_3 = {
-    R"(["1573.5343", "1678.6556", null, "355.7626", null, "1911.8426", "453113.3556"])"};
+      R"(["1573.5343", "1678.6556", null, "355.7626", null, "1911.8426", "453113.3556"])"};
   MakeInputBatch(input_data_string_3, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_4 = {
-    R"(["5467.4224", "12345.6546", "435.2543", "643.0000", "643.0001", "42342.5642", "42663.2675"])"};
+      R"(["5467.4224", "12345.6546", "435.2543", "643.0000", "643.0001", "42342.5642", "42663.2675"])"};
   MakeInputBatch(input_data_string_4, sch, &input_batch);
   input_batch_list.push_back(input_batch);
   std::vector<std::string> input_data_string_5 = {
-    R"([null, "43556.3466", "245.2455", "6423.2562", "6342.0001", "75783.4757", "747487.2365"])"};
+      R"([null, "43556.3466", "245.2455", "6423.2562", "6342.0001", "75783.4757", "747487.2365"])"};
   MakeInputBatch(input_data_string_5, sch, &input_batch);
   input_batch_list.push_back(input_batch);
-  ////////////////////////////////// calculation ///////////////////////////////////
+  ////////////////////////////////// calculation
+  //////////////////////////////////////
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-    R"(["11.1666", "121.5657", "245.2455", "311.2656", "355.7626", "435.2543", "643.0000", "643.0001", 
+      R"(["11.1666", "121.5657", "245.2455", "311.2656", "355.7626", "435.2543", "643.0000", "643.0001", 
         "811.3656", "861.6656", "1311.7556", "1573.5343", "1678.6556", "1911.8426", "5467.4224", "5511.7856", 
         "6311.1236", "6342.0001", "6423.2562", "12345.6546", "42342.5642", "42663.2675", "43556.3466", 
         "75783.4757", "132311.7856", "324311.8956", "453113.3556", "532311.7986", "747487.2365", "832312.2656", 
diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_window.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_window.cc
index cd98793ab..63067aad4 100644
--- a/native-sql-engine/cpp/src/tests/arrow_compute_test_window.cc
+++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_window.cc
@@ -19,11 +19,11 @@
 #include <arrow/type.h>
 #include <gtest/gtest.h>
 
-#include "precompile/array.h"
-#include "tests/test_utils.h"
 #include "codegen/code_generator.h"
 #include "codegen/code_generator_factory.h"
+#include "precompile/array.h"
 #include "precompile/gandiva.h"
+#include "tests/test_utils.h"
 
 using arrow::int64;
 using arrow::uint32;
@@ -34,20 +34,26 @@ namespace codegen {
 
 TEST(TestArrowComputeWindow, DoubleTest) {
   std::shared_ptr<arrow::RecordBatch> input_batch;
-  auto sch = arrow::schema({field("col_int", arrow::int32()), field("col_dou", arrow::float64())});
-  std::vector<std::string> input_data = {
-      "[1, 2, 1]",
-      "[35.612, 37.244, 82.664]"};
+  auto sch = arrow::schema(
+      {field("col_int", arrow::int32()), field("col_dou", arrow::float64())});
+  std::vector<std::string> input_data = {"[1, 2, 1]", "[35.612, 37.244, 82.664]"};
   MakeInputBatch(input_data, sch, &input_batch);
 
   std::shared_ptr<Field> res = field("window_res", arrow::float64());
 
-  auto f_window = TreeExprBuilder::MakeExpression(TreeExprBuilder::MakeFunction("window", {
-      TreeExprBuilder::MakeFunction("sum",
-          {TreeExprBuilder::MakeField(field("col_dou", arrow::float64()))}, null()),
-      TreeExprBuilder::MakeFunction("partitionSpec",
-          {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
-  }, binary()), res);
+  auto f_window = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeFunction(
+          "window",
+          {
+              TreeExprBuilder::MakeFunction(
+                  "sum", {TreeExprBuilder::MakeField(field("col_dou", arrow::float64()))},
+                  null()),
+              TreeExprBuilder::MakeFunction(
+                  "partitionSpec",
+                  {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
+          },
+          binary()),
+      res);
 
   arrow::compute::ExecContext ctx;
   std::shared_ptr<CodeGenerator> expr;
@@ -58,8 +64,7 @@ TEST(TestArrowComputeWindow, DoubleTest) {
   ASSERT_NOT_OK(expr->finish(&out))
 
   std::shared_ptr<arrow::RecordBatch> expected_result;
-  std::vector<std::string> expected_output_data = {
-      "[118.276, 37.244, 118.276]"};
+  std::vector<std::string> expected_output_data = {"[118.276, 37.244, 118.276]"};
 
   MakeInputBatch(expected_output_data, arrow::schema({res}), &expected_result);
   ASSERT_NOT_OK(Equals(*expected_result.get(), *(out.at(0).get())));
@@ -67,20 +72,26 @@ TEST(TestArrowComputeWindow, DoubleTest) {
 
 TEST(TestArrowComputeWindow, LongAvgTest) {
   std::shared_ptr<arrow::RecordBatch> input_batch;
-  auto sch = arrow::schema({field("col_int", arrow::int32()), field("col_long", arrow::int64())});
-  std::vector<std::string> input_data = {
-      "[1, 2, 1]",
-      "[35612, 37244, 82664]"};
+  auto sch = arrow::schema(
+      {field("col_int", arrow::int32()), field("col_long", arrow::int64())});
+  std::vector<std::string> input_data = {"[1, 2, 1]", "[35612, 37244, 82664]"};
   MakeInputBatch(input_data, sch, &input_batch);
 
   std::shared_ptr<Field> res = field("window_res", arrow::int64());
 
-  auto f_window = TreeExprBuilder::MakeExpression(TreeExprBuilder::MakeFunction("window", {
-      TreeExprBuilder::MakeFunction("avg",
-                                    {TreeExprBuilder::MakeField(field("col_long", arrow::int64()))}, null()),
-      TreeExprBuilder::MakeFunction("partitionSpec",
-                                    {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
-  }, binary()), res);
+  auto f_window = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeFunction(
+          "window",
+          {
+              TreeExprBuilder::MakeFunction(
+                  "avg", {TreeExprBuilder::MakeField(field("col_long", arrow::int64()))},
+                  null()),
+              TreeExprBuilder::MakeFunction(
+                  "partitionSpec",
+                  {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
+          },
+          binary()),
+      res);
 
   arrow::compute::ExecContext ctx;
   std::shared_ptr<CodeGenerator> expr;
@@ -91,8 +102,7 @@ TEST(TestArrowComputeWindow, LongAvgTest) {
   ASSERT_NOT_OK(expr->finish(&out))
 
   std::shared_ptr<arrow::RecordBatch> expected_result;
-  std::vector<std::string> expected_output_data = {
-      "[59138, 37244, 59138]"};
+  std::vector<std::string> expected_output_data = {"[59138, 37244, 59138]"};
 
   MakeInputBatch(expected_output_data, arrow::schema({res}), &expected_result);
   ASSERT_NOT_OK(Equals(*expected_result.get(), *(out.at(0).get())));
@@ -100,20 +110,28 @@ TEST(TestArrowComputeWindow, LongAvgTest) {
 
 TEST(TestArrowComputeWindow, DecimalTest) {
   std::shared_ptr<arrow::RecordBatch> input_batch;
-  auto sch = arrow::schema({field("col_int", arrow::int32()), field("col_dec", arrow::decimal128(8, 3))});
-  std::vector<std::string> input_data = {
-      "[1, 2, 1]",
-      "[\"35.612\", \"37.244\", \"82.664\"]"};
+  auto sch = arrow::schema(
+      {field("col_int", arrow::int32()), field("col_dec", arrow::decimal128(8, 3))});
+  std::vector<std::string> input_data = {"[1, 2, 1]",
+                                         "[\"35.612\", \"37.244\", \"82.664\"]"};
   MakeInputBatch(input_data, sch, &input_batch);
 
   std::shared_ptr<Field> res = field("window_res", arrow::decimal128(8, 3));
 
-  auto f_window = TreeExprBuilder::MakeExpression(TreeExprBuilder::MakeFunction("window", {
-      TreeExprBuilder::MakeFunction("sum",
-                                    {TreeExprBuilder::MakeField(field("col_dec", arrow::decimal128(8, 3)))}, null()),
-      TreeExprBuilder::MakeFunction("partitionSpec",
-                                    {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
-  }, binary()), res);
+  auto f_window = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeFunction(
+          "window",
+          {
+              TreeExprBuilder::MakeFunction(
+                  "sum",
+                  {TreeExprBuilder::MakeField(field("col_dec", arrow::decimal128(8, 3)))},
+                  null()),
+              TreeExprBuilder::MakeFunction(
+                  "partitionSpec",
+                  {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
+          },
+          binary()),
+      res);
 
   arrow::compute::ExecContext ctx;
   std::shared_ptr<CodeGenerator> expr;
@@ -132,22 +150,30 @@ TEST(TestArrowComputeWindow, DecimalTest) {
 }
 
 TEST(TestArrowComputeWindow, DecimalAvgTest) {
-  return; // fixme decimal avg not supported?
+  return;  // fixme decimal avg not supported?
   std::shared_ptr<arrow::RecordBatch> input_batch;
-  auto sch = arrow::schema({field("col_int", arrow::int32()), field("col_dec", arrow::decimal128(8, 3))});
-  std::vector<std::string> input_data = {
-      "[1, 2, 1]",
-      "[\"35.612\", \"37.244\", \"82.664\"]"};
+  auto sch = arrow::schema(
+      {field("col_int", arrow::int32()), field("col_dec", arrow::decimal128(8, 3))});
+  std::vector<std::string> input_data = {"[1, 2, 1]",
+                                         "[\"35.612\", \"37.244\", \"82.664\"]"};
   MakeInputBatch(input_data, sch, &input_batch);
 
   std::shared_ptr<Field> res = field("window_res", arrow::decimal128(8, 3));
 
-  auto f_window = TreeExprBuilder::MakeExpression(TreeExprBuilder::MakeFunction("window", {
-      TreeExprBuilder::MakeFunction("avg",
-                                    {TreeExprBuilder::MakeField(field("col_dec", arrow::decimal128(8, 3)))}, null()),
-      TreeExprBuilder::MakeFunction("partitionSpec",
-                                    {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
-  }, binary()), res);
+  auto f_window = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeFunction(
+          "window",
+          {
+              TreeExprBuilder::MakeFunction(
+                  "avg",
+                  {TreeExprBuilder::MakeField(field("col_dec", arrow::decimal128(8, 3)))},
+                  null()),
+              TreeExprBuilder::MakeFunction(
+                  "partitionSpec",
+                  {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
+          },
+          binary()),
+      res);
 
   arrow::compute::ExecContext ctx;
   std::shared_ptr<CodeGenerator> expr;
@@ -167,20 +193,28 @@ TEST(TestArrowComputeWindow, DecimalAvgTest) {
 
 TEST(TestArrowComputeWindow, DecimalRankTest) {
   std::shared_ptr<arrow::RecordBatch> input_batch;
-  auto sch = arrow::schema({field("col_int", arrow::int32()), field("col_dec", arrow::decimal128(8, 3))});
-  std::vector<std::string> input_data = {
-      "[1, 2, 1]",
-      "[\"35.612\", \"37.244\", \"35.613\"]"};
+  auto sch = arrow::schema(
+      {field("col_int", arrow::int32()), field("col_dec", arrow::decimal128(8, 3))});
+  std::vector<std::string> input_data = {"[1, 2, 1]",
+                                         "[\"35.612\", \"37.244\", \"35.613\"]"};
   MakeInputBatch(input_data, sch, &input_batch);
 
   std::shared_ptr<Field> res = field("window_res", arrow::int32());
 
-  auto f_window = TreeExprBuilder::MakeExpression(TreeExprBuilder::MakeFunction("window", {
-      TreeExprBuilder::MakeFunction("rank_desc",
-                                    {TreeExprBuilder::MakeField(field("col_dec", arrow::decimal128(8, 3)))}, null()),
-      TreeExprBuilder::MakeFunction("partitionSpec",
-                                    {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
-  }, binary()), res);
+  auto f_window = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeFunction(
+          "window",
+          {
+              TreeExprBuilder::MakeFunction(
+                  "rank_desc",
+                  {TreeExprBuilder::MakeField(field("col_dec", arrow::decimal128(8, 3)))},
+                  null()),
+              TreeExprBuilder::MakeFunction(
+                  "partitionSpec",
+                  {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
+          },
+          binary()),
+      res);
 
   arrow::compute::ExecContext ctx;
   std::shared_ptr<CodeGenerator> expr;
@@ -191,8 +225,7 @@ TEST(TestArrowComputeWindow, DecimalRankTest) {
   ASSERT_NOT_OK(expr->finish(&out))
 
   std::shared_ptr<arrow::RecordBatch> expected_result;
-  std::vector<std::string> expected_output_data = {
-      "[2, 1, 1]"};
+  std::vector<std::string> expected_output_data = {"[2, 1, 1]"};
 
   MakeInputBatch(expected_output_data, arrow::schema({res}), &expected_result);
   ASSERT_NOT_OK(Equals(*expected_result.get(), *(out.at(0).get())));
@@ -200,20 +233,28 @@ TEST(TestArrowComputeWindow, DecimalRankTest) {
 
 TEST(TestArrowComputeWindow, DecimalRankTest2) {
   std::shared_ptr<arrow::RecordBatch> input_batch;
-  auto sch = arrow::schema({field("col_int", arrow::int32()), field("col_dec", arrow::decimal128(8, 3))});
-  std::vector<std::string> input_data = {
-      "[1, 2, 1]",
-      "[\"35.612\", \"37.244\", \"35.612\"]"};
+  auto sch = arrow::schema(
+      {field("col_int", arrow::int32()), field("col_dec", arrow::decimal128(8, 3))});
+  std::vector<std::string> input_data = {"[1, 2, 1]",
+                                         "[\"35.612\", \"37.244\", \"35.612\"]"};
   MakeInputBatch(input_data, sch, &input_batch);
 
   std::shared_ptr<Field> res = field("window_res", arrow::int32());
 
-  auto f_window = TreeExprBuilder::MakeExpression(TreeExprBuilder::MakeFunction("window", {
-      TreeExprBuilder::MakeFunction("rank_desc",
-                                    {TreeExprBuilder::MakeField(field("col_dec", arrow::decimal128(8, 3)))}, null()),
-      TreeExprBuilder::MakeFunction("partitionSpec",
-                                    {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
-  }, binary()), res);
+  auto f_window = TreeExprBuilder::MakeExpression(
+      TreeExprBuilder::MakeFunction(
+          "window",
+          {
+              TreeExprBuilder::MakeFunction(
+                  "rank_desc",
+                  {TreeExprBuilder::MakeField(field("col_dec", arrow::decimal128(8, 3)))},
+                  null()),
+              TreeExprBuilder::MakeFunction(
+                  "partitionSpec",
+                  {TreeExprBuilder::MakeField(field("col_int", arrow::int32()))}, null()),
+          },
+          binary()),
+      res);
 
   arrow::compute::ExecContext ctx;
   std::shared_ptr<CodeGenerator> expr;
@@ -224,8 +265,7 @@ TEST(TestArrowComputeWindow, DecimalRankTest2) {
   ASSERT_NOT_OK(expr->finish(&out))
 
   std::shared_ptr<arrow::RecordBatch> expected_result;
-  std::vector<std::string> expected_output_data = {
-      "[1, 1, 1]"};
+  std::vector<std::string> expected_output_data = {"[1, 1, 1]"};
 
   MakeInputBatch(expected_output_data, arrow::schema({res}), &expected_result);
   ASSERT_NOT_OK(Equals(*expected_result.get(), *(out.at(0).get())));
diff --git a/native-sql-engine/cpp/src/tests/arrow_compute_test_wscg.cc b/native-sql-engine/cpp/src/tests/arrow_compute_test_wscg.cc
index cb5d7afbc..0bdd5686a 100644
--- a/native-sql-engine/cpp/src/tests/arrow_compute_test_wscg.cc
+++ b/native-sql-engine/cpp/src/tests/arrow_compute_test_wscg.cc
@@ -1658,38 +1658,35 @@ TEST(TestArrowComputeWSCG, WSCGTestStringInnerMergeJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key_func = TreeExprBuilder::MakeFunction(
-      "upper", {TreeExprBuilder::MakeField(table1_f0)}, utf8());
-  auto n_right_key = TreeExprBuilder::MakeFunction("codegen_right_key_schema",
-                                                   {n_right_key_func}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2), TreeExprBuilder::MakeField(table1_f0),
-       TreeExprBuilder::MakeField(table1_f1)},
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key_func = TreeExprBuilder::MakeFunction( "upper",
+{TreeExprBuilder::MakeField(table1_f0)}, utf8()); auto n_right_key =
+TreeExprBuilder::MakeFunction("codegen_right_key_schema", {n_right_key_func},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2),
+TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
       uint32());
   auto n_condition = TreeExprBuilder::MakeFunction(
       "greater_than",
-      {TreeExprBuilder::MakeField(table0_f2), TreeExprBuilder::MakeField(table1_f1)},
-      boolean());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedMergeJoinInner",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_condition}, uint32());
-  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
+      {TreeExprBuilder::MakeField(table0_f2),
+TreeExprBuilder::MakeField(table1_f1)}, boolean()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedMergeJoinInner", {n_left, n_right,
+n_left_key, n_right_key, n_result, n_condition}, uint32()); auto n_child =
+TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
   //////////////////////////////////////////////////////////////////
-  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -1699,15 +1696,16 @@ TEST(TestArrowComputeWSCG, WSCGTestStringInnerMergeJoin) {
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(
       ctx.memory_pool(), arrow::schema({}), {mergeJoin_expr},
-      {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_join, true));
+      {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_join,
+true));
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
-  auto n_nulls_order =
-      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
-  auto result_type = TreeExprBuilder::MakeFunction(
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal},
+uint32()); auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal},
+uint32()); auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check",
+{false_literal}, uint32()); auto result_type = TreeExprBuilder::MakeFunction(
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
@@ -1715,15 +1713,16 @@ TEST(TestArrowComputeWSCG, WSCGTestStringInnerMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
   auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_left =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left},
+uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
@@ -1731,15 +1730,15 @@ TEST(TestArrowComputeWSCG, WSCGTestStringInnerMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
   auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_right =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
-                                    {sortArrays_expr_right}, {table1_f0, table1_f1},
-                                    &expr_sort_right, true));
+                                    {sortArrays_expr_right}, {table1_f0,
+table1_f1}, &expr_sort_right, true));
 
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -1749,15 +1748,15 @@ TEST(TestArrowComputeWSCG, WSCGTestStringInnerMergeJoin) {
   std::vector<std::shared_ptr<arrow::RecordBatch>> table_0;
   std::vector<std::shared_ptr<arrow::RecordBatch>> table_1;
 
-  std::vector<std::string> input_data_string = {R"(["BJ", "SH", "HZ", "BH", "NY", "SH"])",
-                                                R"(["A", "A", "C", "D", "C", "D"])",
+  std::vector<std::string> input_data_string = {R"(["BJ", "SH", "HZ", "BH",
+"NY", "SH"])", R"(["A", "A", "C", "D", "C", "D"])",
                                                 "[10, 3, 1, 2, 13, 11]"};
   MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
   input_data_string = {R"(["TK", "SH", "PH", "NJ", "NB", "SZ"])",
-                       R"(["F", "F", "A", "B", "D", "C"])", "[6, 12, 5, 8, 16, 110]"};
-  MakeInputBatch(input_data_string, schema_table_0, &input_batch);
+                       R"(["F", "F", "A", "B", "D", "C"])", "[6, 12, 5, 8, 16,
+110]"}; MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
   std::vector<std::string> input_data_2_string = {
@@ -1776,11 +1775,11 @@ TEST(TestArrowComputeWSCG, WSCGTestStringInnerMergeJoin) {
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
       R"(["BJ", "NJ", "NY", "SH", "SH", "SH", "SZ", "SZ"])",
-      R"(["A", "B", "C", "A", "D", "F", "C", "C"])", "[10, 8, 13, 3, 11, 12, 110, 110]",
-      R"(["bj", "nj", "ny", "sh", "sh", "sh", "sz", "sz"])", "[3, 5, 5, 1, 1, 1, 2, 12]"};
-  auto res_sch = arrow::schema({table0_f0, table0_f1, table0_f2, table1_f0, table1_f1});
-  MakeInputBatch(expected_result_string, res_sch, &expected_result);
-  expected_table.push_back(expected_result);
+      R"(["A", "B", "C", "A", "D", "F", "C", "C"])", "[10, 8, 13, 3, 11, 12,
+110, 110]", R"(["bj", "nj", "ny", "sh", "sh", "sh", "sz", "sz"])", "[3, 5, 5, 1,
+1, 1, 2, 12]"}; auto res_sch = arrow::schema({table0_f0, table0_f1, table0_f2,
+table1_f0, table1_f1}); MakeInputBatch(expected_result_string, res_sch,
+&expected_result); expected_table.push_back(expected_result);
 
   ////////////////////// evaluate //////////////////////
   for (auto batch : table_0) {
@@ -1822,34 +1821,32 @@ TEST(TestArrowComputeWSCG, WSCGTestInnerMergeJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
-      "codegen_right_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto f_res = field("res", uint32());
+      "codegen_right_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto f_res = field("res", uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2), TreeExprBuilder::MakeField(table1_f0)},
-      uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedMergeJoinInner", {n_left, n_right, n_left_key, n_right_key, n_result},
-      uint32());
-  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2),
+TreeExprBuilder::MakeField(table1_f0)}, uint32()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedMergeJoinInner", {n_left, n_right,
+n_left_key, n_right_key, n_result}, uint32()); auto n_child =
+TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
   //////////////////////////////////////////////////////////////////
-  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0});
-  auto schema_table = arrow::schema({table0_f0, table0_f1, table0_f2, table1_f0});
-  std::shared_ptr<CodeGenerator> expr_join;
+  auto schema_table = arrow::schema({table0_f0, table0_f1, table0_f2,
+table1_f0}); std::shared_ptr<CodeGenerator> expr_join;
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(
       ctx.memory_pool(), arrow::schema({}), {mergeJoin_expr},
@@ -1857,11 +1854,11 @@ TEST(TestArrowComputeWSCG, WSCGTestInnerMergeJoin) {
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
-  auto n_nulls_order =
-      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
-  auto result_type = TreeExprBuilder::MakeFunction(
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal},
+uint32()); auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal},
+uint32()); auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check",
+{false_literal}, uint32()); auto result_type = TreeExprBuilder::MakeFunction(
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
@@ -1869,15 +1866,16 @@ TEST(TestArrowComputeWSCG, WSCGTestInnerMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
   auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_left =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left},
+uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
@@ -1885,11 +1883,11 @@ TEST(TestArrowComputeWSCG, WSCGTestInnerMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
   auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_right =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
                                     {sortArrays_expr_right}, {table1_f0},
@@ -1904,18 +1902,20 @@ TEST(TestArrowComputeWSCG, WSCGTestInnerMergeJoin) {
   std::vector<std::shared_ptr<arrow::RecordBatch>> table_1;
 
   std::vector<std::string> input_data_string = {R"([12, 27, 34, 10, 39, 27])",
-                                                R"(["A", "A", "C", "D", "C", "D"])",
+                                                R"(["A", "A", "C", "D", "C",
+"D"])",
                                                 "[10, 3, 1, 2, 13, 11]"};
   MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
-  input_data_string = {R"([31, 27, 24, 24, 16, 45])", R"(["F", "F", "A", "B", "D", "C"])",
+  input_data_string = {R"([31, 27, 24, 24, 16, 45])", R"(["F", "F", "A", "B",
+"D", "C"])",
                        "[6, 12, 5, 8, 16, 110]"};
   MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
-  std::vector<std::string> input_data_2_string = {R"([27, 45, 12, null, 39, 34])"};
-  MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
+  std::vector<std::string> input_data_2_string = {R"([27, 45, 12, null, 39,
+34])"}; MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
   table_1.push_back(input_batch);
 
   input_data_2_string = {R"([24, null, 18, 22, 24, 45])"};
@@ -1929,8 +1929,9 @@ TEST(TestArrowComputeWSCG, WSCGTestInnerMergeJoin) {
   std::vector<std::string> expected_result_string = {
       R"([12,24,24,24,24,27,27,27,34,39,45,45])",
       R"(["A","A","B","A","B","A","D","F","C","C","C","C"])",
-      "[10,5,8,5,8,3,11,12,1,13,110,110]", R"([12,24,24,24,24,27,27,27,34,39,45,45])"};
-  auto res_sch = arrow::schema({table0_f0, table0_f1, table0_f2, table1_f0});
+      "[10,5,8,5,8,3,11,12,1,13,110,110]",
+R"([12,24,24,24,24,27,27,27,34,39,45,45])"}; auto res_sch =
+arrow::schema({table0_f0, table0_f1, table0_f2, table1_f0});
   MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
@@ -1975,38 +1976,35 @@ TEST(TestArrowComputeWSCG, WSCGTestStringOuterMergeJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key_func = TreeExprBuilder::MakeFunction(
-      "upper", {TreeExprBuilder::MakeField(table1_f0)}, utf8());
-  auto n_right_key = TreeExprBuilder::MakeFunction("codegen_right_key_schema",
-                                                   {n_right_key_func}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2), TreeExprBuilder::MakeField(table1_f0),
-       TreeExprBuilder::MakeField(table1_f1)},
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key_func = TreeExprBuilder::MakeFunction( "upper",
+{TreeExprBuilder::MakeField(table1_f0)}, utf8()); auto n_right_key =
+TreeExprBuilder::MakeFunction("codegen_right_key_schema", {n_right_key_func},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2),
+TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
       uint32());
   auto n_condition = TreeExprBuilder::MakeFunction(
       "greater_than",
-      {TreeExprBuilder::MakeField(table0_f2), TreeExprBuilder::MakeField(table1_f1)},
-      boolean());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedMergeJoinOuter",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_condition}, uint32());
-  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
+      {TreeExprBuilder::MakeField(table0_f2),
+TreeExprBuilder::MakeField(table1_f1)}, boolean()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedMergeJoinOuter", {n_left, n_right,
+n_left_key, n_right_key, n_result, n_condition}, uint32()); auto n_child =
+TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
   //////////////////////////////////////////////////////////////////
-  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -2016,15 +2014,16 @@ TEST(TestArrowComputeWSCG, WSCGTestStringOuterMergeJoin) {
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(
       ctx.memory_pool(), arrow::schema({}), {mergeJoin_expr},
-      {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_join, true));
+      {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_join,
+true));
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
-  auto n_nulls_order =
-      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
-  auto result_type = TreeExprBuilder::MakeFunction(
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal},
+uint32()); auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal},
+uint32()); auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check",
+{false_literal}, uint32()); auto result_type = TreeExprBuilder::MakeFunction(
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
@@ -2032,15 +2031,16 @@ TEST(TestArrowComputeWSCG, WSCGTestStringOuterMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
   auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_left =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left},
+uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
@@ -2048,15 +2048,15 @@ TEST(TestArrowComputeWSCG, WSCGTestStringOuterMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
   auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_right =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
-                                    {sortArrays_expr_right}, {table1_f0, table1_f1},
-                                    &expr_sort_right, true));
+                                    {sortArrays_expr_right}, {table1_f0,
+table1_f1}, &expr_sort_right, true));
 
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -2066,15 +2066,15 @@ TEST(TestArrowComputeWSCG, WSCGTestStringOuterMergeJoin) {
   std::vector<std::shared_ptr<arrow::RecordBatch>> table_0;
   std::vector<std::shared_ptr<arrow::RecordBatch>> table_1;
 
-  std::vector<std::string> input_data_string = {R"(["BJ", "SH", "HZ", "BH", "NY", "SH"])",
-                                                R"(["A", "A", "C", "D", "C", "D"])",
+  std::vector<std::string> input_data_string = {R"(["BJ", "SH", "HZ", "BH",
+"NY", "SH"])", R"(["A", "A", "C", "D", "C", "D"])",
                                                 "[10, 3, 1, 2, 13, 11]"};
   MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
   input_data_string = {R"(["TK", "SH", "PH", "NJ", "NB", "SZ"])",
-                       R"(["F", "F", "A", "B", "D", "C"])", "[6, 12, 5, 8, 16, 110]"};
-  MakeInputBatch(input_data_string, schema_table_0, &input_batch);
+                       R"(["F", "F", "A", "B", "D", "C"])", "[6, 12, 5, 8, 16,
+110]"}; MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
   std::vector<std::string> input_data_2_string = {
@@ -2092,15 +2092,15 @@ TEST(TestArrowComputeWSCG, WSCGTestStringOuterMergeJoin) {
   std::vector<std::shared_ptr<RecordBatch>> expected_table;
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      R"([null, null, "BJ", null, null, null, "NJ", "NY", null, "SH", "SH", "SH", "SZ",
-"SZ"])", R"([null, null, "A", null, null, null, "B", "C", null, "A", "D", "F", "C",
-"C"])",
+      R"([null, null, "BJ", null, null, null, "NJ", "NY", null, "SH", "SH",
+"SH", "SZ", "SZ"])", R"([null, null, "A", null, null, null, "B", "C", null, "A",
+"D", "F", "C", "C"])",
       "[null, null, 10, null, null, null, 8, 13, null, 3, 11, 12, 110, 110]",
-      R"([null, null, "bj", "hz", "jh", "kk", "nj", "ny", "ph", "sh", "sh", "sh", "sz",
-"sz"])",
+      R"([null, null, "bj", "hz", "jh", "kk", "nj", "ny", "ph", "sh", "sh",
+"sh", "sz", "sz"])",
       "[4, 8, 3, 6, 9, 10, 5, 5, 7, 1, 1, 1, 2, 12]"};
-  auto res_sch = arrow::schema({table0_f0, table0_f1, table0_f2, table1_f0, table1_f1});
-  MakeInputBatch(expected_result_string, res_sch, &expected_result);
+  auto res_sch = arrow::schema({table0_f0, table0_f1, table0_f2, table1_f0,
+table1_f1}); MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
   ////////////////////// evaluate //////////////////////
@@ -2144,30 +2144,28 @@ TEST(TestArrowComputeWSCG, WSCGTestAntiMergeJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedMergeJoinAnti", {n_left, n_right, n_left_key, n_right_key, n_result},
-      uint32());
-  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedMergeJoinAnti", {n_left, n_right,
+n_left_key, n_right_key, n_result}, uint32()); auto n_child =
+TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
   //////////////////////////////////////////////////////////////////
-  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -2176,16 +2174,16 @@ TEST(TestArrowComputeWSCG, WSCGTestAntiMergeJoin) {
   std::shared_ptr<CodeGenerator> expr_join;
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), arrow::schema({}),
-                                    {mergeJoin_expr}, {table1_f0, table1_f1}, &expr_join,
-                                    true));
+                                    {mergeJoin_expr}, {table1_f0, table1_f1},
+&expr_join, true));
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
-  auto n_nulls_order =
-      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
-  auto result_type = TreeExprBuilder::MakeFunction(
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal},
+uint32()); auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal},
+uint32()); auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check",
+{false_literal}, uint32()); auto result_type = TreeExprBuilder::MakeFunction(
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
@@ -2193,15 +2191,16 @@ TEST(TestArrowComputeWSCG, WSCGTestAntiMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
   auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_left =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left},
+uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
@@ -2209,15 +2208,15 @@ TEST(TestArrowComputeWSCG, WSCGTestAntiMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
   auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_right =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
-                                    {sortArrays_expr_right}, {table1_f0, table1_f1},
-                                    &expr_sort_right, true));
+                                    {sortArrays_expr_right}, {table1_f0,
+table1_f1}, &expr_sort_right, true));
 
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -2251,8 +2250,8 @@ TEST(TestArrowComputeWSCG, WSCGTestAntiMergeJoin) {
   auto res_sch = arrow::schema({table1_f0, table1_f1});
   std::vector<std::shared_ptr<RecordBatch>> expected_table;
   std::shared_ptr<arrow::RecordBatch> expected_result;
-  std::vector<std::string> expected_result_string = {"[4, 7, 9, 11]", "[4, 7, 9, 11]"};
-  MakeInputBatch(expected_result_string, res_sch, &expected_result);
+  std::vector<std::string> expected_result_string = {"[4, 7, 9, 11]", "[4, 7, 9,
+11]"}; MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
   ////////////////////// evaluate //////////////////////
@@ -2296,34 +2295,31 @@ TEST(TestArrowComputeWSCG, WSCGTestAntiMergeJoinWithCondition) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_condition = TreeExprBuilder::MakeFunction(
-      "greater_than",
-      {TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table1_f1)},
-      arrow::boolean());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedMergeJoinAnti",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_condition}, uint32());
-  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_condition =
+TreeExprBuilder::MakeFunction( "greater_than",
+      {TreeExprBuilder::MakeField(table0_f1),
+TreeExprBuilder::MakeField(table1_f1)}, arrow::boolean()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedMergeJoinAnti", {n_left, n_right,
+n_left_key, n_right_key, n_result, n_condition}, uint32()); auto n_child =
+TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
   //////////////////////////////////////////////////////////////////
-  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -2332,16 +2328,16 @@ TEST(TestArrowComputeWSCG, WSCGTestAntiMergeJoinWithCondition) {
   std::shared_ptr<CodeGenerator> expr_join;
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), arrow::schema({}),
-                                    {mergeJoin_expr}, {table1_f0, table1_f1}, &expr_join,
-                                    true));
+                                    {mergeJoin_expr}, {table1_f0, table1_f1},
+&expr_join, true));
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
-  auto n_nulls_order =
-      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
-  auto result_type = TreeExprBuilder::MakeFunction(
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal},
+uint32()); auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal},
+uint32()); auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check",
+{false_literal}, uint32()); auto result_type = TreeExprBuilder::MakeFunction(
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
@@ -2349,15 +2345,16 @@ TEST(TestArrowComputeWSCG, WSCGTestAntiMergeJoinWithCondition) {
       "key_field", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
   auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_left =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left},
+uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
@@ -2365,15 +2362,15 @@ TEST(TestArrowComputeWSCG, WSCGTestAntiMergeJoinWithCondition) {
       "key_field", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
   auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_right =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
-                                    {sortArrays_expr_right}, {table1_f0, table1_f1},
-                                    &expr_sort_right, true));
+                                    {sortArrays_expr_right}, {table1_f0,
+table1_f1}, &expr_sort_right, true));
 
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -2407,9 +2404,10 @@ TEST(TestArrowComputeWSCG, WSCGTestAntiMergeJoinWithCondition) {
   auto res_sch = arrow::schema({table1_f0, table1_f1});
   std::vector<std::shared_ptr<RecordBatch>> expected_table;
   std::shared_ptr<arrow::RecordBatch> expected_result;
-  std::vector<std::string> expected_result_string = {"[2, 4, 5, 7, 8, 9, 11, 12]",
-                                                     "[2, 4, 5, 7, 8, 9, 11, 12]"};
-  MakeInputBatch(expected_result_string, res_sch, &expected_result);
+  std::vector<std::string> expected_result_string = {"[2, 4, 5, 7, 8, 9, 11,
+12]",
+                                                     "[2, 4, 5, 7, 8, 9, 11,
+12]"}; MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
   ////////////////////// evaluate //////////////////////
@@ -2453,30 +2451,28 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedMergeJoinSemi", {n_left, n_right, n_left_key, n_right_key, n_result},
-      uint32());
-  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedMergeJoinSemi", {n_left, n_right,
+n_left_key, n_right_key, n_result}, uint32()); auto n_child =
+TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
   //////////////////////////////////////////////////////////////////
-  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -2485,16 +2481,16 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoin) {
   std::shared_ptr<CodeGenerator> expr_join;
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), arrow::schema({}),
-                                    {mergeJoin_expr}, {table1_f0, table1_f1}, &expr_join,
-                                    true));
+                                    {mergeJoin_expr}, {table1_f0, table1_f1},
+&expr_join, true));
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
-  auto n_nulls_order =
-      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
-  auto result_type = TreeExprBuilder::MakeFunction(
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal},
+uint32()); auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal},
+uint32()); auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check",
+{false_literal}, uint32()); auto result_type = TreeExprBuilder::MakeFunction(
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
@@ -2502,15 +2498,16 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
   auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_left =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left},
+uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
@@ -2518,15 +2515,15 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
   auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_right =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
-                                    {sortArrays_expr_right}, {table1_f0, table1_f1},
-                                    &expr_sort_right, true));
+                                    {sortArrays_expr_right}, {table1_f0,
+table1_f1}, &expr_sort_right, true));
 
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -2547,8 +2544,8 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoin) {
   table_0.push_back(input_batch);
 
   std::vector<std::string> input_data_2_string = {"[1, 3, 4, 5, 6]",
-                                                  R"(["BJ", "TY", "NY", "SH", "HZ"])"};
-  MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
+                                                  R"(["BJ", "TY", "NY", "SH",
+"HZ"])"}; MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
   table_1.push_back(input_batch);
 
   input_data_2_string = {"[7, 8, 9, 10, 11, 12]",
@@ -2562,8 +2559,8 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoin) {
   std::vector<std::shared_ptr<RecordBatch>> expected_table;
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      "[1, 3, 5, 6, 8, 10, 12]", R"(["BJ", "TY", "SH", "HZ", "NY", "IT", "TL"])"};
-  MakeInputBatch(expected_result_string, res_sch, &expected_result);
+      "[1, 3, 5, 6, 8, 10, 12]", R"(["BJ", "TY", "SH", "HZ", "NY", "IT",
+"TL"])"}; MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
   ////////////////////// evaluate //////////////////////
@@ -2607,34 +2604,31 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoinWithCondition) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_condition = TreeExprBuilder::MakeFunction(
-      "greater_than",
-      {TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
-      arrow::boolean());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedMergeJoinSemi",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_condition}, uint32());
-  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_condition =
+TreeExprBuilder::MakeFunction( "greater_than",
+      {TreeExprBuilder::MakeField(table0_f1),
+TreeExprBuilder::MakeField(table0_f2)}, arrow::boolean()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedMergeJoinSemi", {n_left, n_right,
+n_left_key, n_right_key, n_result, n_condition}, uint32()); auto n_child =
+TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
   //////////////////////////////////////////////////////////////////
-  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -2643,16 +2637,16 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoinWithCondition) {
   std::shared_ptr<CodeGenerator> expr_join;
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), arrow::schema({}),
-                                    {mergeJoin_expr}, {table1_f0, table1_f1}, &expr_join,
-                                    true));
+                                    {mergeJoin_expr}, {table1_f0, table1_f1},
+&expr_join, true));
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
-  auto n_nulls_order =
-      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
-  auto result_type = TreeExprBuilder::MakeFunction(
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal},
+uint32()); auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal},
+uint32()); auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check",
+{false_literal}, uint32()); auto result_type = TreeExprBuilder::MakeFunction(
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
@@ -2660,15 +2654,16 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoinWithCondition) {
       "key_field", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
   auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_left =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left},
+uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
@@ -2676,15 +2671,15 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoinWithCondition) {
       "key_field", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
   auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_right =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
-                                    {sortArrays_expr_right}, {table1_f0, table1_f1},
-                                    &expr_sort_right, true));
+                                    {sortArrays_expr_right}, {table1_f0,
+table1_f1}, &expr_sort_right, true));
 
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -2705,8 +2700,8 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoinWithCondition) {
   table_0.push_back(input_batch);
 
   std::vector<std::string> input_data_2_string = {"[1, 3, 4, 5, 6]",
-                                                  R"(["BJ", "TY", "NY", "SH", "HZ"])"};
-  MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
+                                                  R"(["BJ", "TY", "NY", "SH",
+"HZ"])"}; MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
   table_1.push_back(input_batch);
 
   input_data_2_string = {"[7, 8, 9, 10, 11, 12]",
@@ -2720,8 +2715,8 @@ TEST(TestArrowComputeWSCG, WSCGTestSemiMergeJoinWithCondition) {
   std::vector<std::shared_ptr<RecordBatch>> expected_table;
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {"[1, 5, 6, 8, 10]",
-                                                     R"(["BJ", "SH", "HZ", "NY", "IT"])"};
-  MakeInputBatch(expected_result_string, res_sch, &expected_result);
+                                                     R"(["BJ", "SH", "HZ", "NY",
+"IT"])"}; MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
   ////////////////////// evaluate //////////////////////
@@ -2766,31 +2761,31 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1),
-       TreeExprBuilder::MakeField(f_exist)},
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1), TreeExprBuilder::MakeField(f_exist)},
       uint32());
   auto n_probeArrays = TreeExprBuilder::MakeFunction(
       "conditionedMergeJoinExistence",
       {n_left, n_right, n_left_key, n_right_key, n_result}, uint32());
-  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
+  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays},
+uint32());
   //////////////////////////////////////////////////////////////////
-  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -2799,16 +2794,16 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoin) {
   std::shared_ptr<CodeGenerator> expr_join;
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), arrow::schema({}),
-                                    {mergeJoin_expr}, {table1_f0, table1_f1, f_exist},
-                                    &expr_join, true));
+                                    {mergeJoin_expr}, {table1_f0, table1_f1,
+f_exist}, &expr_join, true));
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
-  auto n_nulls_order =
-      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
-  auto result_type = TreeExprBuilder::MakeFunction(
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal},
+uint32()); auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal},
+uint32()); auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check",
+{false_literal}, uint32()); auto result_type = TreeExprBuilder::MakeFunction(
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
@@ -2816,15 +2811,16 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
   auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_left =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left},
+uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
@@ -2832,15 +2828,15 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoin) {
       "key_field", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
   auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_right =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
-                                    {sortArrays_expr_right}, {table1_f0, table1_f1},
-                                    &expr_sort_right, true));
+                                    {sortArrays_expr_right}, {table1_f0,
+table1_f1}, &expr_sort_right, true));
 
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -2861,8 +2857,8 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoin) {
   table_0.push_back(input_batch);
 
   std::vector<std::string> input_data_2_string = {"[1, 3, 4, 5, 6]",
-                                                  R"(["BJ", "TY", "NY", "SH", "HZ"])"};
-  MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
+                                                  R"(["BJ", "TY", "NY", "SH",
+"HZ"])"}; MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
   table_1.push_back(input_batch);
 
   input_data_2_string = {"[7, 8, 9, 10, 11, 12]",
@@ -2924,35 +2920,33 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoinWithCondition) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1),
-       TreeExprBuilder::MakeField(f_exist)},
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1), TreeExprBuilder::MakeField(f_exist)},
       uint32());
   auto n_condition = TreeExprBuilder::MakeFunction(
       "greater_than",
-      {TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
-      arrow::boolean());
-  auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedMergeJoinExistence",
-      {n_left, n_right, n_left_key, n_right_key, n_result, n_condition}, uint32());
-  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
+      {TreeExprBuilder::MakeField(table0_f1),
+TreeExprBuilder::MakeField(table0_f2)}, arrow::boolean()); auto n_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedMergeJoinExistence", {n_left,
+n_right, n_left_key, n_right_key, n_result, n_condition}, uint32()); auto
+n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
   //////////////////////////////////////////////////////////////////
-  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -2961,16 +2955,16 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoinWithCondition) {
   std::shared_ptr<CodeGenerator> expr_join;
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), arrow::schema({}),
-                                    {mergeJoin_expr}, {table1_f0, table1_f1, f_exist},
-                                    &expr_join, true));
+                                    {mergeJoin_expr}, {table1_f0, table1_f1,
+f_exist}, &expr_join, true));
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
-  auto n_nulls_order =
-      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
-  auto result_type = TreeExprBuilder::MakeFunction(
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal},
+uint32()); auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal},
+uint32()); auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check",
+{false_literal}, uint32()); auto result_type = TreeExprBuilder::MakeFunction(
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
@@ -2978,15 +2972,16 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoinWithCondition) {
       "key_field", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
   auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_left =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left},
+uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
@@ -2994,15 +2989,15 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoinWithCondition) {
       "key_field", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
   auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_right =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
-                                    {sortArrays_expr_right}, {table1_f0, table1_f1},
-                                    &expr_sort_right, true));
+                                    {sortArrays_expr_right}, {table1_f0,
+table1_f1}, &expr_sort_right, true));
 
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -3023,8 +3018,8 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoinWithCondition) {
   table_0.push_back(input_batch);
 
   std::vector<std::string> input_data_2_string = {"[1, 3, 4, 5, 6]",
-                                                  R"(["BJ", "TY", "NY", "SH", "HZ"])"};
-  MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
+                                                  R"(["BJ", "TY", "NY", "SH",
+"HZ"])"}; MakeInputBatch(input_data_2_string, schema_table_1, &input_batch);
   table_1.push_back(input_batch);
 
   input_data_2_string = {"[7, 8, 9, 10, 11, 12]",
@@ -3040,8 +3035,8 @@ TEST(TestArrowComputeWSCG, WSCGTestExistenceMergeJoinWithCondition) {
   std::vector<std::string> expected_result_string = {
       "[1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]",
       R"(["BJ", "TY", "NY", "SH", "HZ", "SH", "NY", "BJ", "IT", "BR", "TL"])",
-      "[true, false, false, true, true, false, true, false, true, false, false]"};
-  MakeInputBatch(expected_result_string, res_sch, &expected_result);
+      "[true, false, false, true, true, false, true, false, true, false,
+false]"}; MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
   ////////////////////// evaluate //////////////////////
@@ -3085,37 +3080,36 @@ TEST(TestArrowComputeWSCG, WSCGTestTwoKeysOuterMergeJoin) {
   ///////////////////////////////////////////
   auto n_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto f_res = field("res", uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto f_res = field("res",
+uint32());
 
   auto n_left_key = TreeExprBuilder::MakeFunction(
       "codegen_left_key_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f2)},
-      uint32());
-  auto n_right_key_func = TreeExprBuilder::MakeFunction(
-      "upper", {TreeExprBuilder::MakeField(table1_f0)}, utf8());
-  auto n_right_key = TreeExprBuilder::MakeFunction(
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f2)}, uint32()); auto n_right_key_func =
+TreeExprBuilder::MakeFunction( "upper", {TreeExprBuilder::MakeField(table1_f0)},
+utf8()); auto n_right_key = TreeExprBuilder::MakeFunction(
       "codegen_right_key_schema",
       {n_right_key_func, TreeExprBuilder::MakeField(table1_f1)}, uint32());
   auto n_result = TreeExprBuilder::MakeFunction(
       "result",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2), TreeExprBuilder::MakeField(table1_f0),
-       TreeExprBuilder::MakeField(table1_f1)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2),
+TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
       uint32());
   auto n_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedMergeJoinOuter", {n_left, n_right, n_left_key, n_right_key, n_result},
-      uint32());
-  auto n_child = TreeExprBuilder::MakeFunction("child", {n_probeArrays}, uint32());
+      "conditionedMergeJoinOuter", {n_left, n_right, n_left_key, n_right_key,
+n_result}, uint32()); auto n_child = TreeExprBuilder::MakeFunction("child",
+{n_probeArrays}, uint32());
   //////////////////////////////////////////////////////////////////
-  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+  auto n_wscg = TreeExprBuilder::MakeFunction("wholestagecodegen", {n_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -3125,13 +3119,14 @@ TEST(TestArrowComputeWSCG, WSCGTestTwoKeysOuterMergeJoin) {
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(
       ctx.memory_pool(), arrow::schema({}), {mergeJoin_expr},
-      {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_join, true));
+      {table0_f0, table0_f1, table0_f2, table1_f0, table1_f1}, &expr_join,
+true));
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
   auto n_dir = TreeExprBuilder::MakeFunction("sort_directions",
-                                             {true_literal, true_literal}, uint32());
-  auto n_nulls_order = TreeExprBuilder::MakeFunction(
+                                             {true_literal, true_literal},
+uint32()); auto n_nulls_order = TreeExprBuilder::MakeFunction(
       "sort_nulls_order", {true_literal, true_literal}, uint32());
   auto NaN_check = TreeExprBuilder::MakeFunction(
       "NaN_check", {false_literal, false_literal}, uint32());
@@ -3139,43 +3134,39 @@ TEST(TestArrowComputeWSCG, WSCGTestTwoKeysOuterMergeJoin) {
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f2)},
-      uint32());
-  auto n_key_field_left = TreeExprBuilder::MakeFunction(
-      "key_field",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f2)},
-      uint32());
-  auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f2)}, uint32()); auto n_key_field_left =
+TreeExprBuilder::MakeFunction( "key_field",
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f2)}, uint32()); auto n_sort_to_indices_left =
+TreeExprBuilder::MakeFunction( "sortArraysToIndices", {n_key_func_left,
+n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type}, uint32()); auto
+n_sort_left = TreeExprBuilder::MakeFunction("standalone",
+{n_sort_to_indices_left}, uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_key_field_right = TreeExprBuilder::MakeFunction(
-      "key_field",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
-      "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_key_field_right =
+TreeExprBuilder::MakeFunction( "key_field",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_sort_to_indices_right
+= TreeExprBuilder::MakeFunction( "sortArraysToIndices", {n_key_func_right,
+n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type}, uint32());
   auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
-                                    {sortArrays_expr_right}, {table1_f0, table1_f1},
-                                    &expr_sort_right, true));
+                                    {sortArrays_expr_right}, {table1_f0,
+table1_f1}, &expr_sort_right, true));
 
   ///////////////////// Calculation //////////////////
   std::shared_ptr<arrow::RecordBatch> input_batch;
@@ -3185,15 +3176,15 @@ TEST(TestArrowComputeWSCG, WSCGTestTwoKeysOuterMergeJoin) {
   std::vector<std::shared_ptr<arrow::RecordBatch>> table_0;
   std::vector<std::shared_ptr<arrow::RecordBatch>> table_1;
 
-  std::vector<std::string> input_data_string = {R"(["BJ", "SH", "HZ", "BH", "NY", "SH"])",
-                                                R"(["A", "A", "C", "D", "C", "D"])",
+  std::vector<std::string> input_data_string = {R"(["BJ", "SH", "HZ", "BH",
+"NY", "SH"])", R"(["A", "A", "C", "D", "C", "D"])",
                                                 "[10, 13, 1, 2, 13, 11]"};
   MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
   input_data_string = {R"(["TK", "SH", "PH", "NJ", "NB", "SZ"])",
-                       R"(["F", "F", "A", "B", "D", "C"])", "[6, 11, 5, 8, 16, 12]"};
-  MakeInputBatch(input_data_string, schema_table_0, &input_batch);
+                       R"(["F", "F", "A", "B", "D", "C"])", "[6, 11, 5, 8, 16,
+12]"}; MakeInputBatch(input_data_string, schema_table_0, &input_batch);
   table_0.push_back(input_batch);
 
   std::vector<std::string> input_data_2_string = {
@@ -3211,13 +3202,15 @@ TEST(TestArrowComputeWSCG, WSCGTestTwoKeysOuterMergeJoin) {
   std::vector<std::shared_ptr<RecordBatch>> expected_table;
   std::shared_ptr<arrow::RecordBatch> expected_result;
   std::vector<std::string> expected_result_string = {
-      R"([null, null, "BJ", null, null, null, "NJ", "NY", null, "SH", "SH", null, "SZ"])",
-      R"([null, null, "A", null, null, null, "B", "C", null, "D", "F", null, "C"])",
+      R"([null, null, "BJ", null, null, null, "NJ", "NY", null, "SH", "SH",
+null, "SZ"])", R"([null, null, "A", null, null, null, "B", "C", null, "D", "F",
+null, "C"])",
       "[null, null, 10, null, null, null, 8, 13, null, 11, 11, null, 12]",
-      R"([null, null, "bj", "hz", "jh", "kk", "nj", "ny", "ph", "sh", "sh", "sz", "sz"])",
+      R"([null, null, "bj", "hz", "jh", "kk", "nj", "ny", "ph", "sh", "sh",
+"sz", "sz"])",
       "[4, 8, 10, 6, 9, 10, 8, 13, 7, 11, 11, 2, 12]"};
-  auto res_sch = arrow::schema({table0_f0, table0_f1, table0_f2, table1_f0, table1_f1});
-  MakeInputBatch(expected_result_string, res_sch, &expected_result);
+  auto res_sch = arrow::schema({table0_f0, table0_f1, table0_f2, table1_f0,
+table1_f1}); MakeInputBatch(expected_result_string, res_sch, &expected_result);
   expected_table.push_back(expected_result);
 
   ////////////////////// evaluate //////////////////////
@@ -3265,57 +3258,52 @@ TEST(TestArrowComputeWSCG, WSCGTestContinuousMergeJoinSemiExistence) {
   auto f_res = field("res", uint32());
   auto n_semi_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema",
-      {TreeExprBuilder::MakeField(table0_f0), TreeExprBuilder::MakeField(table0_f1),
-       TreeExprBuilder::MakeField(table0_f2)},
+      {TreeExprBuilder::MakeField(table0_f0),
+TreeExprBuilder::MakeField(table0_f1), TreeExprBuilder::MakeField(table0_f2)},
       uint32());
   auto n_semi_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32());
 
   auto n_semi_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
-  auto n_semi_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_semi_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
-  auto n_semi_probeArrays = TreeExprBuilder::MakeFunction(
-      "conditionedMergeJoinSemi",
-      {n_semi_left, n_semi_right, n_semi_left_key, n_semi_right_key, n_semi_result},
-      uint32());
-  auto n_semi_child =
-      TreeExprBuilder::MakeFunction("child", {n_semi_probeArrays}, uint32());
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table0_f0)},
+uint32()); auto n_semi_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_semi_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32()); auto n_semi_probeArrays =
+TreeExprBuilder::MakeFunction( "conditionedMergeJoinSemi", {n_semi_left,
+n_semi_right, n_semi_left_key, n_semi_right_key, n_semi_result}, uint32()); auto
+n_semi_child = TreeExprBuilder::MakeFunction("child", {n_semi_probeArrays},
+uint32());
 
   //////////////////////////////////////////////////////////////////
   auto n_existence_left = TreeExprBuilder::MakeFunction(
       "codegen_left_schema", {TreeExprBuilder::MakeField(table2_f0)}, uint32());
   auto n_existence_right = TreeExprBuilder::MakeFunction(
       "codegen_right_schema",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1)},
-      uint32());
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1)}, uint32());
 
   auto n_existence_left_key = TreeExprBuilder::MakeFunction(
-      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table2_f0)}, uint32());
-  auto n_existence_right_key = TreeExprBuilder::MakeFunction(
-      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
-  auto n_existence_result = TreeExprBuilder::MakeFunction(
-      "result",
-      {TreeExprBuilder::MakeField(table1_f0), TreeExprBuilder::MakeField(table1_f1),
-       TreeExprBuilder::MakeField(f_exist)},
+      "codegen_left_key_schema", {TreeExprBuilder::MakeField(table2_f0)},
+uint32()); auto n_existence_right_key = TreeExprBuilder::MakeFunction(
+      "codegen_right_key_schema", {TreeExprBuilder::MakeField(table1_f0)},
+uint32()); auto n_existence_result = TreeExprBuilder::MakeFunction( "result",
+      {TreeExprBuilder::MakeField(table1_f0),
+TreeExprBuilder::MakeField(table1_f1), TreeExprBuilder::MakeField(f_exist)},
       uint32());
   auto n_existence_probeArrays = TreeExprBuilder::MakeFunction(
       "conditionedMergeJoinExistence",
-      {n_existence_left, n_existence_right, n_existence_left_key, n_existence_right_key,
-       n_existence_result},
-      uint32());
-  auto n_existence_child = TreeExprBuilder::MakeFunction(
-      "child", {n_existence_probeArrays, n_semi_child}, uint32());
+      {n_existence_left, n_existence_right, n_existence_left_key,
+n_existence_right_key, n_existence_result}, uint32()); auto n_existence_child =
+TreeExprBuilder::MakeFunction( "child", {n_existence_probeArrays, n_semi_child},
+uint32());
   //////////////////////////////////////////////////////////////
   auto n_wscg =
-      TreeExprBuilder::MakeFunction("wholestagecodegen", {n_existence_child}, uint32());
-  auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
+      TreeExprBuilder::MakeFunction("wholestagecodegen", {n_existence_child},
+uint32()); auto mergeJoin_expr = TreeExprBuilder::MakeExpression(n_wscg, f_res);
 
   auto schema_table_0 = arrow::schema({table0_f0, table0_f1, table0_f2});
   auto schema_table_1 = arrow::schema({table1_f0, table1_f1});
@@ -3323,16 +3311,16 @@ TEST(TestArrowComputeWSCG, WSCGTestContinuousMergeJoinSemiExistence) {
   std::shared_ptr<CodeGenerator> expr_join;
   arrow::compute::ExecContext ctx;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), arrow::schema({}),
-                                    {mergeJoin_expr}, {table1_f0, table1_f1, f_exist},
-                                    &expr_join, true));
+                                    {mergeJoin_expr}, {table1_f0, table1_f1,
+f_exist}, &expr_join, true));
   /////////////// Sort Kernel ///////////////
   auto true_literal = TreeExprBuilder::MakeLiteral(true);
   auto false_literal = TreeExprBuilder::MakeLiteral(false);
-  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal}, uint32());
-  auto n_nulls_order =
-      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal}, uint32());
-  auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check", {false_literal}, uint32());
-  auto result_type = TreeExprBuilder::MakeFunction(
+  auto n_dir = TreeExprBuilder::MakeFunction("sort_directions", {true_literal},
+uint32()); auto n_nulls_order =
+      TreeExprBuilder::MakeFunction("sort_nulls_order", {true_literal},
+uint32()); auto NaN_check = TreeExprBuilder::MakeFunction("NaN_check",
+{false_literal}, uint32()); auto result_type = TreeExprBuilder::MakeFunction(
       "result_type", {TreeExprBuilder::MakeLiteral((int)1)}, uint32());
   auto n_key_func_left = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
@@ -3340,15 +3328,16 @@ TEST(TestArrowComputeWSCG, WSCGTestContinuousMergeJoinSemiExistence) {
       "key_field", {TreeExprBuilder::MakeField(table0_f0)}, uint32());
   auto n_sort_to_indices_left = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_left =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left}, uint32());
-  auto sortArrays_expr_left = TreeExprBuilder::MakeExpression(n_sort_left, f_res);
+      {n_key_func_left, n_key_field_left, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_left =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left},
+uint32()); auto sortArrays_expr_left =
+TreeExprBuilder::MakeExpression(n_sort_left, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left;
   ASSERT_NOT_OK(
-      CreateCodeGenerator(ctx.memory_pool(), schema_table_0, {sortArrays_expr_left},
-                          {table0_f0, table0_f1, table0_f2}, &expr_sort_left, true));
+      CreateCodeGenerator(ctx.memory_pool(), schema_table_0,
+{sortArrays_expr_left}, {table0_f0, table0_f1, table0_f2}, &expr_sort_left,
+true));
   ////////////////////////////////////////////////
   auto n_key_func_right = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
@@ -3356,15 +3345,15 @@ TEST(TestArrowComputeWSCG, WSCGTestContinuousMergeJoinSemiExistence) {
       "key_field", {TreeExprBuilder::MakeField(table1_f0)}, uint32());
   auto n_sort_to_indices_right = TreeExprBuilder::MakeFunction(
       "sortArraysToIndices",
-      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check, result_type},
-      uint32());
-  auto n_sort_right =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right}, uint32());
-  auto sortArrays_expr_right = TreeExprBuilder::MakeExpression(n_sort_right, f_res);
+      {n_key_func_right, n_key_field_right, n_dir, n_nulls_order, NaN_check,
+result_type}, uint32()); auto n_sort_right =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_right},
+uint32()); auto sortArrays_expr_right =
+TreeExprBuilder::MakeExpression(n_sort_right, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_right;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_1,
-                                    {sortArrays_expr_right}, {table1_f0, table1_f1},
-                                    &expr_sort_right, true));
+                                    {sortArrays_expr_right}, {table1_f0,
+table1_f1}, &expr_sort_right, true));
   ////////////////////////////////////////////////
   auto n_key_func_left_2 = TreeExprBuilder::MakeFunction(
       "key_function", {TreeExprBuilder::MakeField(table2_f0)}, uint32());
@@ -3372,12 +3361,11 @@ TEST(TestArrowComputeWSCG, WSCGTestContinuousMergeJoinSemiExistence) {
       "key_field", {TreeExprBuilder::MakeField(table2_f0)}, uint32());
   auto n_sort_to_indices_left_2 =
       TreeExprBuilder::MakeFunction("sortArraysToIndices",
-                                    {n_key_func_left_2, n_key_field_left_2, n_dir,
-                                     n_nulls_order, NaN_check, result_type},
-                                    uint32());
-  auto n_sort_left_2 =
-      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left_2}, uint32());
-  auto sortArrays_expr_left_2 = TreeExprBuilder::MakeExpression(n_sort_left_2, f_res);
+                                    {n_key_func_left_2, n_key_field_left_2,
+n_dir, n_nulls_order, NaN_check, result_type}, uint32()); auto n_sort_left_2 =
+      TreeExprBuilder::MakeFunction("standalone", {n_sort_to_indices_left_2},
+uint32()); auto sortArrays_expr_left_2 =
+TreeExprBuilder::MakeExpression(n_sort_left_2, f_res);
   std::shared_ptr<CodeGenerator> expr_sort_left_2;
   ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), schema_table_2,
                                     {sortArrays_expr_left_2}, {table2_f0},
@@ -3403,8 +3391,8 @@ TEST(TestArrowComputeWSCG, WSCGTestContinuousMergeJoinSemiExistence) {
   table_0.push_back(input_batch);
 
   std::vector<std::string> input_data_1_string = {"[1, 3, 4, 5, 6]",
-                                                  R"(["BJ", "TY", "NY", "SH", "HZ"])"};
-  MakeInputBatch(input_data_1_string, schema_table_1, &input_batch);
+                                                  R"(["BJ", "TY", "NY", "SH",
+"HZ"])"}; MakeInputBatch(input_data_1_string, schema_table_1, &input_batch);
   table_1.push_back(input_batch);
 
   input_data_1_string = {"[7, 8, 9, 10, 11, 12]",
@@ -3808,17 +3796,23 @@ TEST(TestArrowComputeWSCG, WSCGTestAggregate) {
 
   std::vector<std::string> input_data = {
       "[1, 2, 3, 4, 5, null, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]"};
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]"};
   MakeInputBatch(input_data, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
 
   std::vector<std::string> input_data_2 = {
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
-      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, 46]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]"};
+      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, "
+      "46]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]"};
   MakeInputBatch(input_data_2, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
 
@@ -3918,9 +3912,12 @@ TEST(TestArrowComputeWSCG, WSCGTestGroupbyHashAggregateTwoKeys) {
   std::vector<std::string> input_data = {
       "[1, 2, 3, 4, 5, null, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
       "[1, 2, 3, 4, 5, 5, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
       R"(["BJ", "SH", "HZ", "BH", "NY", "SH", "BH", "BJ", "SH", "SH", "BJ", "BJ", "BJ", "BH", "BH", "HZ", "NY", "NY", "NY", "NY"])"};
   MakeInputBatch(input_data, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
@@ -3928,9 +3925,12 @@ TEST(TestArrowComputeWSCG, WSCGTestGroupbyHashAggregateTwoKeys) {
   std::vector<std::string> input_data_2 = {
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
-      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, 46]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
+      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, "
+      "46]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
       R"(["BJ", "SH", "TK", "SH", "PH", "PH", "SH", "BJ", "SH", "SH", "BJ", "BJ", "BJ", "SH", "SH", "TK", "PH", "PH", "PH", "PH"])"};
   MakeInputBatch(input_data_2, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
@@ -3945,8 +3945,10 @@ TEST(TestArrowComputeWSCG, WSCGTestGroupbyHashAggregateTwoKeys) {
       "[5, 3, 2, 4, 5, 1, 5, 3, 2, 4, 6]",
       "[1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10]",
       "[1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10]",
-      "[16.4, 6.5, 5, 5.875, 5.48, 0.4, 6.1, 6.61905, 3.0625, 2.63889, 2.06667]",
-      "[8.49255, 6.93137, 7.6489, 13.5708, 17.4668, 1.41421, 8.52779, 6.23633, 5.58903, "
+      "[16.4, 6.5, 5, 5.875, 5.48, 0.4, 6.1, 6.61905, 3.0625, 2.63889, "
+      "2.06667]",
+      "[8.49255, 6.93137, 7.6489, 13.5708, 17.4668, 1.41421, 8.52779, 6.23633, "
+      "5.58903, "
       "12.535, 24.3544]"};
   auto res_sch = arrow::schema(ret_types);
   MakeInputBatch(expected_result_string, res_sch, &expected_result);
@@ -4043,18 +4045,24 @@ TEST(TestArrowComputeWSCG, WSCGTestGroupbyHashAggregate) {
   std::vector<std::string> input_data = {
       "[1, 2, 3, 4, 5, null, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
       "[1, 2, 3, 4, 5, 5, 4, 1, 2, 2, 1, 1, 1, 4, 4, 3, 5, 5, 5, 5]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]"};
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]"};
   MakeInputBatch(input_data, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
 
   std::vector<std::string> input_data_2 = {
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
       "[6, 7, 8, 9, 10, 10, 9, 6, 7, 7, 6, 6, 6, 9, 9, 8, 10, 10, 10, 10]",
-      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, 46]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]",
-      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, 1]"};
+      "[7, 8, 4, 5, 6, 1, 34, 54, 65, 66, 78, 12, 32, 24, 32, 45, 12, 24, 35, "
+      "46]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]",
+      "[2, 4, 5, 7, 8, 2, 45, 32, 23, 12, 14, 16, 18, 19, 23, 25, 57, 59, 12, "
+      "1]"};
   MakeInputBatch(input_data_2, sch, &input_batch);
   ASSERT_NOT_OK(aggr_result_iterator->ProcessAndCacheOne(input_batch->columns()));
 
@@ -4065,8 +4073,10 @@ TEST(TestArrowComputeWSCG, WSCGTestGroupbyHashAggregate) {
       "[1, 2, 3, 4, 5, null, 6, 7, 8, 9, 10]",
       "[25, 18, 12, 64, 125, 5, 150, 63, 32, 144, 360]",
       "[1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10]",
-      "[16.4, 6.5, 5, 5.875, 5.48, 0.4, 6.1, 6.61905, 3.0625, 2.63889, 2.06667]",
-      "[8.49255, 6.93137, 7.6489, 13.5708, 17.4668, 1.41421, 8.52779, 6.23633, 5.58903, "
+      "[16.4, 6.5, 5, 5.875, 5.48, 0.4, 6.1, 6.61905, 3.0625, 2.63889, "
+      "2.06667]",
+      "[8.49255, 6.93137, 7.6489, 13.5708, 17.4668, 1.41421, 8.52779, 6.23633, "
+      "5.58903, "
       "12.535, 24.3544]"};
   auto res_sch = arrow::schema(ret_types);
   MakeInputBatch(expected_result_string, res_sch, &expected_result);
diff --git a/native-sql-engine/cpp/src/tests/shuffle_split_test.cc b/native-sql-engine/cpp/src/tests/shuffle_split_test.cc
index 73a14a37e..42e86b42b 100644
--- a/native-sql-engine/cpp/src/tests/shuffle_split_test.cc
+++ b/native-sql-engine/cpp/src/tests/shuffle_split_test.cc
@@ -15,8 +15,6 @@
  * limitations under the License.
  */
 
-#include <iostream>
-
 #include <arrow/compute/api.h>
 #include <arrow/io/api.h>
 #include <arrow/ipc/reader.h>
@@ -25,6 +23,8 @@
 #include <arrow/util/io_util.h>
 #include <gtest/gtest.h>
 
+#include <iostream>
+
 #include "shuffle/splitter.h"
 #include "tests/test_utils.h"
 
@@ -124,7 +124,7 @@ class SplitterTest : public ::testing::Test {
     auto cntx = arrow::compute::ExecContext();
     std::shared_ptr<arrow::RecordBatch> res;
     auto maybe_res = arrow::compute::Take(*input_batch, *take_idx,
-                                       arrow::compute::TakeOptions{}, &cntx);
+                                          arrow::compute::TakeOptions{}, &cntx);
     res = *std::move(maybe_res);
     return res;
   }
@@ -427,7 +427,7 @@ TEST_F(SplitterTest, TestSpillFailWithOutOfMemory) {
 
 TEST_F(SplitterTest, TestSpillLargestPartition) {
   std::shared_ptr<arrow::MemoryPool> pool = std::make_shared<MyMemoryPool>(4000);
-//  pool = std::make_shared<arrow::LoggingMemoryPool>(pool.get());
+  //  pool = std::make_shared<arrow::LoggingMemoryPool>(pool.get());
 
   int32_t num_partitions = 2;
   split_options_.buffer_size = 4;
diff --git a/native-sql-engine/cpp/src/tests/test_utils.h b/native-sql-engine/cpp/src/tests/test_utils.h
index dae602caa..26f25ef65 100644
--- a/native-sql-engine/cpp/src/tests/test_utils.h
+++ b/native-sql-engine/cpp/src/tests/test_utils.h
@@ -25,9 +25,11 @@
 #include <arrow/type.h>
 #include <gandiva/node.h>
 #include <gandiva/tree_expr_builder.h>
+
 #include <iostream>
 #include <memory>
 #include <sstream>
+
 #include "utils/macros.h"
 using namespace arrow;
 
diff --git a/native-sql-engine/cpp/src/third_party/arrow/utils/hashing.h b/native-sql-engine/cpp/src/third_party/arrow/utils/hashing.h
index 28c273fea..f0344ec34 100644
--- a/native-sql-engine/cpp/src/third_party/arrow/utils/hashing.h
+++ b/native-sql-engine/cpp/src/third_party/arrow/utils/hashing.h
@@ -70,8 +70,8 @@ struct ScalarHelperBase {
     // Generic hash computation for scalars.  Simply apply the string hash
     // to the bit representation of the value.
 
-    // XXX in the case of FP values, we'd like equal values to have the same hash,
-    // even if they have different bit representations...
+    // XXX in the case of FP values, we'd like equal values to have the same
+    // hash, even if they have different bit representations...
     return ComputeStringHash<AlgNum>(&value, sizeof(value));
   }
 };
@@ -94,7 +94,8 @@ struct ScalarHelper<Scalar, AlgNum, enable_if_t<std::is_integral<Scalar>::value>
 
     // Multiplying by the prime number mixes the low bits into the high bits,
     // then byte-swapping (which is a single CPU instruction) allows the
-    // combined high and low bits to participate in the initial hash table index.
+    // combined high and low bits to participate in the initial hash table
+    // index.
     auto h = static_cast<hash_t>(value);
     return BitUtil::ByteSwap(multipliers[AlgNum] * h);
   }
@@ -167,9 +168,9 @@ hash_t ComputeStringHash(const void* data, int64_t length) {
 #error XXH3_SECRET_SIZE_MIN changed, please fix kXxh3Secrets
 #endif
 
-  // XXH3_64bits_withSeed generates a secret based on the seed, which is too slow.
-  // Instead, we use hard-coded random secrets.  To maximize cache efficiency,
-  // they reuse the same memory area.
+  // XXH3_64bits_withSeed generates a secret based on the seed, which is too
+  // slow. Instead, we use hard-coded random secrets.  To maximize cache
+  // efficiency, they reuse the same memory area.
   static constexpr unsigned char kXxh3Secrets[XXH3_SECRET_SIZE_MIN + 1] = {
       0xe7, 0x8b, 0x13, 0xf9, 0xfc, 0xb5, 0x8e, 0xef, 0x81, 0x48, 0x2c, 0xbf, 0xf9, 0x9f,
       0xc1, 0x1e, 0x43, 0x6d, 0xbf, 0xa6, 0x6d, 0xb5, 0x72, 0xbc, 0x97, 0xd8, 0x61, 0x24,
diff --git a/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxh3.h b/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxh3.h
index d06cc66a6..825f4cb96 100644
--- a/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxh3.h
+++ b/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxh3.h
@@ -56,745 +56,777 @@
 #ifndef XXH3_H
 #define XXH3_H
 
-
 /* ===   Dependencies   === */
 
-#undef XXH_INLINE_ALL   /* in case it's already defined */
+#undef XXH_INLINE_ALL /* in case it's already defined */
 #define XXH_INLINE_ALL
 #include "xxhash.h"
 
-
 /* ===   Compiler specifics   === */
 
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
-#  define XXH_RESTRICT   restrict
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */
+#define XXH_RESTRICT restrict
 #else
-/* note : it might be useful to define __restrict or __restrict__ for some C++ compilers */
-#  define XXH_RESTRICT   /* disable */
+/* note : it might be useful to define __restrict or __restrict__ for some C++
+ * compilers */
+#define XXH_RESTRICT /* disable */
 #endif
 
 #if defined(__GNUC__)
-#  if defined(__AVX2__)
-#    include <immintrin.h>
-#  elif defined(__SSE2__)
-#    include <emmintrin.h>
-#  elif defined(__ARM_NEON__) || defined(__ARM_NEON)
-#    define inline __inline__  /* clang bug */
-#    include <arm_neon.h>
-#    undef inline
-#  endif
+#if defined(__AVX2__)
+#include <immintrin.h>
+#elif defined(__SSE2__)
+#include <emmintrin.h>
+#elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+#define inline __inline__ /* clang bug */
+#include <arm_neon.h>
+#undef inline
+#endif
 #elif defined(_MSC_VER)
-#  include <intrin.h>
+#include <intrin.h>
 #endif
 
-
-
 /* ==========================================
  * Vectorization detection
  * ========================================== */
 #define XXH_SCALAR 0
-#define XXH_SSE2   1
-#define XXH_AVX2   2
-#define XXH_NEON   3
-#define XXH_VSX    4
-
-#ifndef XXH_VECTOR    /* can be defined on command line */
-#  if defined(__AVX2__)
-#    define XXH_VECTOR XXH_AVX2
-#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
-#    define XXH_VECTOR XXH_SSE2
-#  elif defined(__GNUC__) /* msvc support maybe later */ \
-  && (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
-  && defined(__LITTLE_ENDIAN__) /* ARM big endian is a thing */
-#    define XXH_VECTOR XXH_NEON
-#  elif defined(__PPC64__) && defined(__VSX__) && defined(__GNUC__)
-#    define XXH_VECTOR XXH_VSX
-#  else
-#    define XXH_VECTOR XXH_SCALAR
-#  endif
+#define XXH_SSE2 1
+#define XXH_AVX2 2
+#define XXH_NEON 3
+#define XXH_VSX 4
+
+#ifndef XXH_VECTOR /* can be defined on command line */
+#if defined(__AVX2__)
+#define XXH_VECTOR XXH_AVX2
+#elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || \
+    (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
+#define XXH_VECTOR XXH_SSE2
+#elif defined(__GNUC__) /* msvc support maybe later */   \
+    && (defined(__ARM_NEON__) || defined(__ARM_NEON)) && \
+    defined(__LITTLE_ENDIAN__) /* ARM big endian is a thing */
+#define XXH_VECTOR XXH_NEON
+#elif defined(__PPC64__) && defined(__VSX__) && defined(__GNUC__)
+#define XXH_VECTOR XXH_VSX
+#else
+#define XXH_VECTOR XXH_SCALAR
+#endif
 #endif
 
 /* control alignment of accumulator,
  * for compatibility with fast vector loads */
 #ifndef XXH_ACC_ALIGN
-#  if XXH_VECTOR == 0   /* scalar */
-#     define XXH_ACC_ALIGN 8
-#  elif XXH_VECTOR == 1  /* sse2 */
-#     define XXH_ACC_ALIGN 16
-#  elif XXH_VECTOR == 2  /* avx2 */
-#     define XXH_ACC_ALIGN 32
-#  elif XXH_VECTOR == 3  /* neon */
-#     define XXH_ACC_ALIGN 16
-#  elif XXH_VECTOR == 4  /* vsx */
-#     define XXH_ACC_ALIGN 16
-#  endif
+#if XXH_VECTOR == 0 /* scalar */
+#define XXH_ACC_ALIGN 8
+#elif XXH_VECTOR == 1 /* sse2 */
+#define XXH_ACC_ALIGN 16
+#elif XXH_VECTOR == 2 /* avx2 */
+#define XXH_ACC_ALIGN 32
+#elif XXH_VECTOR == 3 /* neon */
+#define XXH_ACC_ALIGN 16
+#elif XXH_VECTOR == 4 /* vsx */
+#define XXH_ACC_ALIGN 16
+#endif
 #endif
 
 /* U64 XXH_mult32to64(U32 a, U64 b) { return (U64)a * (U64)b; } */
 #if defined(_MSC_VER) && defined(_M_IX86)
-#    include <intrin.h>
-#    define XXH_mult32to64(x, y) __emulu(x, y)
+#include <intrin.h>
+#define XXH_mult32to64(x, y) __emulu(x, y)
 #else
-#    define XXH_mult32to64(x, y) ((U64)((x) & 0xFFFFFFFF) * (U64)((y) & 0xFFFFFFFF))
+#define XXH_mult32to64(x, y) ((U64)((x)&0xFFFFFFFF) * (U64)((y)&0xFFFFFFFF))
 #endif
 
 /* VSX stuff */
 #if XXH_VECTOR == XXH_VSX
-#  include <altivec.h>
-#  undef vector
+#include <altivec.h>
+#undef vector
 typedef __vector unsigned long long U64x2;
 typedef __vector unsigned U32x4;
-/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
+/* Adapted from
+ * https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
 XXH_FORCE_INLINE U64x2 XXH_vsxMultOdd(U32x4 a, U32x4 b) {
-    U64x2 result;
-    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
-    return result;
+  U64x2 result;
+  __asm__("vmulouw %0, %1, %2" : "=v"(result) : "v"(a), "v"(b));
+  return result;
 }
 XXH_FORCE_INLINE U64x2 XXH_vsxMultEven(U32x4 a, U32x4 b) {
-    U64x2 result;
-    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
-    return result;
+  U64x2 result;
+  __asm__("vmuleuw %0, %1, %2" : "=v"(result) : "v"(a), "v"(b));
+  return result;
 }
 #endif
 
-
 /* ==========================================
  * XXH3 default settings
  * ========================================== */
 
-#define XXH_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */
+#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
 
 #if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
-#  error "default keyset is not large enough"
+#error "default keyset is not large enough"
 #endif
 
-XXH_ALIGN(64) static const BYTE kSecret[XXH_SECRET_DEFAULT_SIZE] = {
-    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
-    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
-    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
-    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
-    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
-    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
-    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
-    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
-
-    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
-    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
-    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
-    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+XXH_ALIGN(64)
+static const BYTE kSecret[XXH_SECRET_DEFAULT_SIZE] = {
+    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7,
+    0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40,
+    0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5,
+    0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, 0xb8, 0x08, 0x46, 0x74,
+    0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, 0x3c,
+    0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53,
+    0x2e, 0xa3, 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef,
+    0x46, 0xa9, 0xde, 0xac, 0xd8, 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f,
+    0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, 0x8a, 0x51, 0xe0, 0x4b, 0xcd,
+    0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+
+    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa,
+    0x13, 0x63, 0xeb, 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16,
+    0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1,
+    0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, 0x45, 0xcb, 0x3a, 0x8f,
+    0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
 };
 
+static XXH128_hash_t XXH3_mul128(U64 ll1, U64 ll2) {
+/* __uint128_t seems a bad choice with emscripten current, see
+ * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 */
+#if !defined(__wasm__) && defined(__SIZEOF_INT128__) || \
+    (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
 
-static XXH128_hash_t
-XXH3_mul128(U64 ll1, U64 ll2)
-{
-/* __uint128_t seems a bad choice with emscripten current, see https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 */
-#if !defined(__wasm__) && defined(__SIZEOF_INT128__) || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
-
-    __uint128_t lll = (__uint128_t)ll1 * ll2;
-    XXH128_hash_t const r128 = { (U64)(lll), (U64)(lll >> 64) };
-    return r128;
+  __uint128_t lll = (__uint128_t)ll1 * ll2;
+  XXH128_hash_t const r128 = {(U64)(lll), (U64)(lll >> 64)};
+  return r128;
 
 #elif defined(_M_X64) || defined(_M_IA64)
 
 #ifndef _MSC_VER
-#   pragma intrinsic(_umul128)
+#pragma intrinsic(_umul128)
 #endif
-    U64 llhigh;
-    U64 const lllow = _umul128(ll1, ll2, &llhigh);
-    XXH128_hash_t const r128 = { lllow, llhigh };
-    return r128;
+  U64 llhigh;
+  U64 const lllow = _umul128(ll1, ll2, &llhigh);
+  XXH128_hash_t const r128 = {lllow, llhigh};
+  return r128;
 
 #else /* Portable scalar version */
 
-    /* emulate 64x64->128b multiplication, using four 32x32->64 */
-    U32 const h1 = (U32)(ll1 >> 32);
-    U32 const h2 = (U32)(ll2 >> 32);
-    U32 const l1 = (U32)ll1;
-    U32 const l2 = (U32)ll2;
+  /* emulate 64x64->128b multiplication, using four 32x32->64 */
+  U32 const h1 = (U32)(ll1 >> 32);
+  U32 const h2 = (U32)(ll2 >> 32);
+  U32 const l1 = (U32)ll1;
+  U32 const l2 = (U32)ll2;
 
-    U64 const llh  = XXH_mult32to64(h1, h2);
-    U64 const llm1 = XXH_mult32to64(l1, h2);
-    U64 const llm2 = XXH_mult32to64(h1, l2);
-    U64 const lll  = XXH_mult32to64(l1, l2);
+  U64 const llh = XXH_mult32to64(h1, h2);
+  U64 const llm1 = XXH_mult32to64(l1, h2);
+  U64 const llm2 = XXH_mult32to64(h1, l2);
+  U64 const lll = XXH_mult32to64(l1, l2);
 
-    U64 const t = lll + (llm1 << 32);
-    U64 const carry1 = t < lll;
+  U64 const t = lll + (llm1 << 32);
+  U64 const carry1 = t < lll;
 
-    U64 const lllow = t + (llm2 << 32);
-    U64 const carry2 = lllow < t;
-    U64 const llhigh = llh + (llm1 >> 32) + (llm2 >> 32) + carry1 + carry2;
+  U64 const lllow = t + (llm2 << 32);
+  U64 const carry2 = lllow < t;
+  U64 const llhigh = llh + (llm1 >> 32) + (llm2 >> 32) + carry1 + carry2;
 
-    XXH128_hash_t const r128 = { lllow, llhigh };
-    return r128;
+  XXH128_hash_t const r128 = {lllow, llhigh};
+  return r128;
 
 #endif
 }
 
-
 #if defined(__GNUC__) && defined(__i386__)
 /* GCC is stupid and tries to vectorize this.
  * This tells GCC that it is wrong. */
 __attribute__((__target__("no-sse")))
 #endif
 static U64
-XXH3_mul128_fold64(U64 ll1, U64 ll2)
-{
-/* __uint128_t seems a bad choice with emscripten current, see https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 */
-#if !defined(__wasm__) && defined(__SIZEOF_INT128__) || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+XXH3_mul128_fold64(U64 ll1, U64 ll2) {
+/* __uint128_t seems a bad choice with emscripten current, see
+ * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 */
+#if !defined(__wasm__) && defined(__SIZEOF_INT128__) || \
+    (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
 
-    __uint128_t lll = (__uint128_t)ll1 * ll2;
-    return (U64)lll ^ (U64)(lll >> 64);
+  __uint128_t lll = (__uint128_t)ll1 * ll2;
+  return (U64)lll ^ (U64)(lll >> 64);
 
 #elif defined(_M_X64) || defined(_M_IA64)
 
 #ifndef _MSC_VER
-#   pragma intrinsic(_umul128)
+#pragma intrinsic(_umul128)
 #endif
-    U64 llhigh;
-    U64 const lllow = _umul128(ll1, ll2, &llhigh);
-    return lllow ^ llhigh;
-
-    /* We have to do it out manually on 32-bit.
-     * This is a modified, unrolled, widened, and optimized version of the
-     * mulqdu routine from Hacker's Delight.
-     *
-     *   https://www.hackersdelight.org/hdcodetxt/mulqdu.c.txt
-     *
-     * This was modified to use U32->U64 multiplication instead
-     * of U16->U32, to add the high and low values in the end,
-     * be endian-independent, and I added a partial assembly
-     * implementation for ARM. */
-
-    /* An easy 128-bit folding multiply on ARMv6T2 and ARMv7-A/R can be done with
-     * the mighty umaal (Unsigned Multiply Accumulate Accumulate Long) which takes 4 cycles
-     * or less, doing a long multiply and adding two 32-bit integers:
-     *
-     *     void umaal(U32 *RdLo, U32 *RdHi, U32 Rn, U32 Rm)
-     *     {
-     *         U64 prodAcc = (U64)Rn * (U64)Rm;
-     *         prodAcc += *RdLo;
-     *         prodAcc += *RdHi;
-     *         *RdLo = prodAcc & 0xFFFFFFFF;
-     *         *RdHi = prodAcc >> 32;
-     *     }
-     *
-     * This is compared to umlal which adds to a single 64-bit integer:
-     *
-     *     void umlal(U32 *RdLo, U32 *RdHi, U32 Rn, U32 Rm)
-     *     {
-     *         U64 prodAcc = (U64)Rn * (U64)Rm;
-     *         prodAcc += (*RdLo | ((U64)*RdHi << 32);
-     *         *RdLo = prodAcc & 0xFFFFFFFF;
-     *         *RdHi = prodAcc >> 32;
-     *     }
-     *
-     * Getting the compiler to emit them is like pulling teeth, and checking
-     * for it is annoying because ARMv7-M lacks this instruction. However, it
-     * is worth it, because this is an otherwise expensive operation. */
-
-     /* GCC-compatible, ARMv6t2 or ARMv7+, non-M variant, and 32-bit */
-#elif defined(__GNUC__) /* GCC-compatible */ \
-    && defined(__ARM_ARCH) && !defined(__aarch64__) && !defined(__arm64__) /* 32-bit ARM */\
-    && !defined(__ARM_ARCH_7M__) /* <- Not ARMv7-M  vv*/ \
-        && !(defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM == 0 && __TARGET_ARCH_THUMB == 4) \
-    && (defined(__ARM_ARCH_6T2__) || __ARM_ARCH > 6) /* ARMv6T2 or later */
-
-    U32 w[4] = { 0 };
-    U32 u[2] = { (U32)(ll1 >> 32), (U32)ll1 };
-    U32 v[2] = { (U32)(ll2 >> 32), (U32)ll2 };
-    U32 k;
-
-    /* U64 t = (U64)u[1] * (U64)v[1];
-     * w[3] = t & 0xFFFFFFFF;
-     * k = t >> 32; */
-    __asm__("umull %0, %1, %2, %3"
-            : "=r" (w[3]), "=r" (k)
-            : "r" (u[1]), "r" (v[1]));
-
-    /* t = (U64)u[0] * (U64)v[1] + w[2] + k;
-     * w[2] = t & 0xFFFFFFFF;
-     * k = t >> 32; */
-    __asm__("umaal %0, %1, %2, %3"
-            : "+r" (w[2]), "+r" (k)
-            : "r" (u[0]), "r" (v[1]));
-    w[1] = k;
-    k = 0;
-
-    /* t = (U64)u[1] * (U64)v[0] + w[2] + k;
-     * w[2] = t & 0xFFFFFFFF;
-     * k = t >> 32; */
-    __asm__("umaal %0, %1, %2, %3"
-            : "+r" (w[2]), "+r" (k)
-            : "r" (u[1]), "r" (v[0]));
-
-    /* t = (U64)u[0] * (U64)v[0] + w[1] + k;
-     * w[1] = t & 0xFFFFFFFF;
-     * k = t >> 32; */
-    __asm__("umaal %0, %1, %2, %3"
-            : "+r" (w[1]), "+r" (k)
-            : "r" (u[0]), "r" (v[0]));
-    w[0] = k;
-
-    return (w[1] | ((U64)w[0] << 32)) ^ (w[3] | ((U64)w[2] << 32));
+  U64 llhigh;
+  U64 const lllow = _umul128(ll1, ll2, &llhigh);
+  return lllow ^ llhigh;
+
+  /* We have to do it out manually on 32-bit.
+   * This is a modified, unrolled, widened, and optimized version of the
+   * mulqdu routine from Hacker's Delight.
+   *
+   *   https://www.hackersdelight.org/hdcodetxt/mulqdu.c.txt
+   *
+   * This was modified to use U32->U64 multiplication instead
+   * of U16->U32, to add the high and low values in the end,
+   * be endian-independent, and I added a partial assembly
+   * implementation for ARM. */
+
+  /* An easy 128-bit folding multiply on ARMv6T2 and ARMv7-A/R can be done with
+   * the mighty umaal (Unsigned Multiply Accumulate Accumulate Long) which takes
+   * 4 cycles or less, doing a long multiply and adding two 32-bit integers:
+   *
+   *     void umaal(U32 *RdLo, U32 *RdHi, U32 Rn, U32 Rm)
+   *     {
+   *         U64 prodAcc = (U64)Rn * (U64)Rm;
+   *         prodAcc += *RdLo;
+   *         prodAcc += *RdHi;
+   *         *RdLo = prodAcc & 0xFFFFFFFF;
+   *         *RdHi = prodAcc >> 32;
+   *     }
+   *
+   * This is compared to umlal which adds to a single 64-bit integer:
+   *
+   *     void umlal(U32 *RdLo, U32 *RdHi, U32 Rn, U32 Rm)
+   *     {
+   *         U64 prodAcc = (U64)Rn * (U64)Rm;
+   *         prodAcc += (*RdLo | ((U64)*RdHi << 32);
+   *         *RdLo = prodAcc & 0xFFFFFFFF;
+   *         *RdHi = prodAcc >> 32;
+   *     }
+   *
+   * Getting the compiler to emit them is like pulling teeth, and checking
+   * for it is annoying because ARMv7-M lacks this instruction. However, it
+   * is worth it, because this is an otherwise expensive operation. */
+
+  /* GCC-compatible, ARMv6t2 or ARMv7+, non-M variant, and 32-bit */
+#elif defined(__GNUC__) /* GCC-compatible */                     \
+    && defined(__ARM_ARCH) && !defined(__aarch64__) &&           \
+    !defined(__arm64__)          /* 32-bit ARM */                \
+    && !defined(__ARM_ARCH_7M__) /* <- Not ARMv7-M  vv*/         \
+    && !(defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM == 0 && \
+         __TARGET_ARCH_THUMB == 4) &&                            \
+    (defined(__ARM_ARCH_6T2__) || __ARM_ARCH > 6) /* ARMv6T2 or later */
+
+  U32 w[4] = {0};
+  U32 u[2] = {(U32)(ll1 >> 32), (U32)ll1};
+  U32 v[2] = {(U32)(ll2 >> 32), (U32)ll2};
+  U32 k;
+
+  /* U64 t = (U64)u[1] * (U64)v[1];
+   * w[3] = t & 0xFFFFFFFF;
+   * k = t >> 32; */
+  __asm__("umull %0, %1, %2, %3" : "=r"(w[3]), "=r"(k) : "r"(u[1]), "r"(v[1]));
+
+  /* t = (U64)u[0] * (U64)v[1] + w[2] + k;
+   * w[2] = t & 0xFFFFFFFF;
+   * k = t >> 32; */
+  __asm__("umaal %0, %1, %2, %3" : "+r"(w[2]), "+r"(k) : "r"(u[0]), "r"(v[1]));
+  w[1] = k;
+  k = 0;
+
+  /* t = (U64)u[1] * (U64)v[0] + w[2] + k;
+   * w[2] = t & 0xFFFFFFFF;
+   * k = t >> 32; */
+  __asm__("umaal %0, %1, %2, %3" : "+r"(w[2]), "+r"(k) : "r"(u[1]), "r"(v[0]));
+
+  /* t = (U64)u[0] * (U64)v[0] + w[1] + k;
+   * w[1] = t & 0xFFFFFFFF;
+   * k = t >> 32; */
+  __asm__("umaal %0, %1, %2, %3" : "+r"(w[1]), "+r"(k) : "r"(u[0]), "r"(v[0]));
+  w[0] = k;
+
+  return (w[1] | ((U64)w[0] << 32)) ^ (w[3] | ((U64)w[2] << 32));
 
 #else /* Portable scalar version */
 
-    /* emulate 64x64->128b multiplication, using four 32x32->64 */
-    U32 const h1 = (U32)(ll1 >> 32);
-    U32 const h2 = (U32)(ll2 >> 32);
-    U32 const l1 = (U32)ll1;
-    U32 const l2 = (U32)ll2;
+  /* emulate 64x64->128b multiplication, using four 32x32->64 */
+  U32 const h1 = (U32)(ll1 >> 32);
+  U32 const h2 = (U32)(ll2 >> 32);
+  U32 const l1 = (U32)ll1;
+  U32 const l2 = (U32)ll2;
 
-    U64 const llh  = XXH_mult32to64(h1, h2);
-    U64 const llm1 = XXH_mult32to64(l1, h2);
-    U64 const llm2 = XXH_mult32to64(h1, l2);
-    U64 const lll  = XXH_mult32to64(l1, l2);
+  U64 const llh = XXH_mult32to64(h1, h2);
+  U64 const llm1 = XXH_mult32to64(l1, h2);
+  U64 const llm2 = XXH_mult32to64(h1, l2);
+  U64 const lll = XXH_mult32to64(l1, l2);
 
-    U64 const t = lll + (llm1 << 32);
-    U64 const carry1 = t < lll;
+  U64 const t = lll + (llm1 << 32);
+  U64 const carry1 = t < lll;
 
-    U64 const lllow = t + (llm2 << 32);
-    U64 const carry2 = lllow < t;
-    U64 const llhigh = llh + (llm1 >> 32) + (llm2 >> 32) + carry1 + carry2;
+  U64 const lllow = t + (llm2 << 32);
+  U64 const carry2 = lllow < t;
+  U64 const llhigh = llh + (llm1 >> 32) + (llm2 >> 32) + carry1 + carry2;
 
-    return llhigh ^ lllow;
+  return llhigh ^ lllow;
 
 #endif
 }
 
-
-static XXH64_hash_t XXH3_avalanche(U64 h64)
-{
-    h64 ^= h64 >> 37;
-    h64 *= PRIME64_3;
-    h64 ^= h64 >> 32;
-    return h64;
+static XXH64_hash_t XXH3_avalanche(U64 h64) {
+  h64 ^= h64 >> 37;
+  h64 *= PRIME64_3;
+  h64 ^= h64 >> 32;
+  return h64;
 }
 
-
 /* ==========================================
  * Short keys
  * ========================================== */
 
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_len_1to3_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
-{
-    XXH_ASSERT(data != NULL);
-    XXH_ASSERT(1 <= len && len <= 3);
-    XXH_ASSERT(keyPtr != NULL);
-    {   BYTE const c1 = ((const BYTE*)data)[0];
-        BYTE const c2 = ((const BYTE*)data)[len >> 1];
-        BYTE const c3 = ((const BYTE*)data)[len - 1];
-        U32  const combined = ((U32)c1) + (((U32)c2) << 8) + (((U32)c3) << 16) + (((U32)len) << 24);
-        U64  const keyed = (U64)combined ^ (XXH_readLE32(keyPtr) + seed);
-        U64  const mixed = keyed * PRIME64_1;
-        return XXH3_avalanche(mixed);
-    }
+XXH_FORCE_INLINE XXH64_hash_t XXH3_len_1to3_64b(const void* data, size_t len,
+                                                const void* keyPtr, XXH64_hash_t seed) {
+  XXH_ASSERT(data != NULL);
+  XXH_ASSERT(1 <= len && len <= 3);
+  XXH_ASSERT(keyPtr != NULL);
+  {
+    BYTE const c1 = ((const BYTE*)data)[0];
+    BYTE const c2 = ((const BYTE*)data)[len >> 1];
+    BYTE const c3 = ((const BYTE*)data)[len - 1];
+    U32 const combined =
+        ((U32)c1) + (((U32)c2) << 8) + (((U32)c3) << 16) + (((U32)len) << 24);
+    U64 const keyed = (U64)combined ^ (XXH_readLE32(keyPtr) + seed);
+    U64 const mixed = keyed * PRIME64_1;
+    return XXH3_avalanche(mixed);
+  }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_len_4to8_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
-{
-    XXH_ASSERT(data != NULL);
-    XXH_ASSERT(keyPtr != NULL);
-    XXH_ASSERT(4 <= len && len <= 8);
-    {   U32 const in1 = XXH_readLE32(data);
-        U32 const in2 = XXH_readLE32((const BYTE*)data + len - 4);
-        U64 const in64 = in1 + ((U64)in2 << 32);
-        U64 const keyed = in64 ^ (XXH_readLE64(keyPtr) + seed);
-        U64 const mix64 = len + ((keyed ^ (keyed >> 51)) * PRIME32_1);
-        return XXH3_avalanche((mix64 ^ (mix64 >> 47)) * PRIME64_2);
-    }
+XXH_FORCE_INLINE XXH64_hash_t XXH3_len_4to8_64b(const void* data, size_t len,
+                                                const void* keyPtr, XXH64_hash_t seed) {
+  XXH_ASSERT(data != NULL);
+  XXH_ASSERT(keyPtr != NULL);
+  XXH_ASSERT(4 <= len && len <= 8);
+  {
+    U32 const in1 = XXH_readLE32(data);
+    U32 const in2 = XXH_readLE32((const BYTE*)data + len - 4);
+    U64 const in64 = in1 + ((U64)in2 << 32);
+    U64 const keyed = in64 ^ (XXH_readLE64(keyPtr) + seed);
+    U64 const mix64 = len + ((keyed ^ (keyed >> 51)) * PRIME32_1);
+    return XXH3_avalanche((mix64 ^ (mix64 >> 47)) * PRIME64_2);
+  }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_len_9to16_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
-{
-    XXH_ASSERT(data != NULL);
-    XXH_ASSERT(keyPtr != NULL);
-    XXH_ASSERT(9 <= len && len <= 16);
-    {   const U64* const key64 = (const U64*) keyPtr;
-        U64 const ll1 = XXH_readLE64(data) ^ (XXH_readLE64(key64) + seed);
-        U64 const ll2 = XXH_readLE64((const BYTE*)data + len - 8) ^ (XXH_readLE64(key64+1) - seed);
-        U64 const acc = len + (ll1 + ll2) + XXH3_mul128_fold64(ll1, ll2);
-        return XXH3_avalanche(acc);
-    }
+XXH_FORCE_INLINE XXH64_hash_t XXH3_len_9to16_64b(const void* data, size_t len,
+                                                 const void* keyPtr, XXH64_hash_t seed) {
+  XXH_ASSERT(data != NULL);
+  XXH_ASSERT(keyPtr != NULL);
+  XXH_ASSERT(9 <= len && len <= 16);
+  {
+    const U64* const key64 = (const U64*)keyPtr;
+    U64 const ll1 = XXH_readLE64(data) ^ (XXH_readLE64(key64) + seed);
+    U64 const ll2 =
+        XXH_readLE64((const BYTE*)data + len - 8) ^ (XXH_readLE64(key64 + 1) - seed);
+    U64 const acc = len + (ll1 + ll2) + XXH3_mul128_fold64(ll1, ll2);
+    return XXH3_avalanche(acc);
+  }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_len_0to16_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
-{
-    XXH_ASSERT(len <= 16);
-    {   if (len > 8) return XXH3_len_9to16_64b(data, len, keyPtr, seed);
-        if (len >= 4) return XXH3_len_4to8_64b(data, len, keyPtr, seed);
-        if (len) return XXH3_len_1to3_64b(data, len, keyPtr, seed);
-        return 0;
-    }
+XXH_FORCE_INLINE XXH64_hash_t XXH3_len_0to16_64b(const void* data, size_t len,
+                                                 const void* keyPtr, XXH64_hash_t seed) {
+  XXH_ASSERT(len <= 16);
+  {
+    if (len > 8) return XXH3_len_9to16_64b(data, len, keyPtr, seed);
+    if (len >= 4) return XXH3_len_4to8_64b(data, len, keyPtr, seed);
+    if (len) return XXH3_len_1to3_64b(data, len, keyPtr, seed);
+    return 0;
+  }
 }
 
-
 /* ===    Long Keys    === */
 
 #define STRIPE_LEN 64
-#define XXH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
+#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */
 #define ACC_NB (STRIPE_LEN / sizeof(U64))
 
 typedef enum { XXH3_acc_64bits, XXH3_acc_128bits } XXH3_accWidth_e;
 
-XXH_FORCE_INLINE void
-XXH3_accumulate_512(      void* XXH_RESTRICT acc,
-                    const void* XXH_RESTRICT data,
-                    const void* XXH_RESTRICT key,
-                    XXH3_accWidth_e accWidth)
-{
+XXH_FORCE_INLINE void XXH3_accumulate_512(void* XXH_RESTRICT acc,
+                                          const void* XXH_RESTRICT data,
+                                          const void* XXH_RESTRICT key,
+                                          XXH3_accWidth_e accWidth) {
 #if (XXH_VECTOR == XXH_AVX2)
 
-    XXH_ASSERT((((size_t)acc) & 31) == 0);
-    {   XXH_ALIGN(32) __m256i* const xacc  =       (__m256i *) acc;
-        const         __m256i* const xdata = (const __m256i *) data;  /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this type */
-        const         __m256i* const xkey  = (const __m256i *) key;   /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this type */
-
-        size_t i;
-        for (i=0; i < STRIPE_LEN/sizeof(__m256i); i++) {
-            __m256i const d   = _mm256_loadu_si256 (xdata+i);
-            __m256i const k   = _mm256_loadu_si256 (xkey+i);
-            __m256i const dk  = _mm256_xor_si256 (d,k);                                  /* uint32 dk[8]  = {d0+k0, d1+k1, d2+k2, d3+k3, ...} */
-            __m256i const mul = _mm256_mul_epu32 (dk, _mm256_shuffle_epi32 (dk, 0x31));  /* uint64 mul[4] = {dk0*dk1, dk2*dk3, ...} */
-            if (accWidth == XXH3_acc_128bits) {
-                __m256i const dswap = _mm256_shuffle_epi32(d, _MM_SHUFFLE(1,0,3,2));
-                __m256i const add = _mm256_add_epi64(xacc[i], dswap);
-                xacc[i]  = _mm256_add_epi64(mul, add);
-            } else {  /* XXH3_acc_64bits */
-                __m256i const add = _mm256_add_epi64(xacc[i], d);
-                xacc[i]  = _mm256_add_epi64(mul, add);
-            }
-    }   }
+  XXH_ASSERT((((size_t)acc) & 31) == 0);
+  {
+    XXH_ALIGN(32) __m256i* const xacc = (__m256i*)acc;
+    const __m256i* const xdata =
+        (const __m256i*)data; /* not really aligned, just for ptr arithmetic, and because
+                                 _mm256_loadu_si256() requires this type */
+    const __m256i* const xkey =
+        (const __m256i*)key; /* not really aligned, just for ptr arithmetic, and because
+                                _mm256_loadu_si256() requires this type */
+
+    size_t i;
+    for (i = 0; i < STRIPE_LEN / sizeof(__m256i); i++) {
+      __m256i const d = _mm256_loadu_si256(xdata + i);
+      __m256i const k = _mm256_loadu_si256(xkey + i);
+      __m256i const dk =
+          _mm256_xor_si256(d, k); /* uint32 dk[8]  = {d0+k0, d1+k1, d2+k2, d3+k3, ...} */
+      __m256i const mul = _mm256_mul_epu32(
+          dk,
+          _mm256_shuffle_epi32(dk, 0x31)); /* uint64 mul[4] = {dk0*dk1, dk2*dk3, ...} */
+      if (accWidth == XXH3_acc_128bits) {
+        __m256i const dswap = _mm256_shuffle_epi32(d, _MM_SHUFFLE(1, 0, 3, 2));
+        __m256i const add = _mm256_add_epi64(xacc[i], dswap);
+        xacc[i] = _mm256_add_epi64(mul, add);
+      } else { /* XXH3_acc_64bits */
+        __m256i const add = _mm256_add_epi64(xacc[i], d);
+        xacc[i] = _mm256_add_epi64(mul, add);
+      }
+    }
+  }
 
 #elif (XXH_VECTOR == XXH_SSE2)
 
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-    {   XXH_ALIGN(16) __m128i* const xacc  =       (__m128i *) acc;   /* presumed */
-        const         __m128i* const xdata = (const __m128i *) data;  /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this type */
-        const         __m128i* const xkey  = (const __m128i *) key;   /* not really aligned, just for ptr arithmetic, and because _mm_loadu_si128() requires this type */
-
-        size_t i;
-        for (i=0; i < STRIPE_LEN/sizeof(__m128i); i++) {
-            __m128i const d   = _mm_loadu_si128 (xdata+i);
-            __m128i const k   = _mm_loadu_si128 (xkey+i);
-            __m128i const dk  = _mm_xor_si128 (d,k);                                 /* uint32 dk[4]  = {d0+k0, d1+k1, d2+k2, d3+k3} */
-            __m128i const mul = _mm_mul_epu32 (dk, _mm_shuffle_epi32 (dk, 0x31));    /* uint64 mul[2] = {dk0*dk1,dk2*dk3} */
-            if (accWidth == XXH3_acc_128bits) {
-                __m128i const dswap = _mm_shuffle_epi32(d, _MM_SHUFFLE(1,0,3,2));
-                __m128i const add = _mm_add_epi64(xacc[i], dswap);
-                xacc[i]  = _mm_add_epi64(mul, add);
-            } else {  /* XXH3_acc_64bits */
-                __m128i const add = _mm_add_epi64(xacc[i], d);
-                xacc[i]  = _mm_add_epi64(mul, add);
-            }
-    }   }
+  XXH_ASSERT((((size_t)acc) & 15) == 0);
+  {
+    XXH_ALIGN(16) __m128i* const xacc = (__m128i*)acc; /* presumed */
+    const __m128i* const xdata =
+        (const __m128i*)data; /* not really aligned, just for ptr arithmetic, and because
+                                 _mm_loadu_si128() requires this type */
+    const __m128i* const xkey =
+        (const __m128i*)key; /* not really aligned, just for ptr arithmetic, and
+                                because _mm_loadu_si128() requires this type */
+
+    size_t i;
+    for (i = 0; i < STRIPE_LEN / sizeof(__m128i); i++) {
+      __m128i const d = _mm_loadu_si128(xdata + i);
+      __m128i const k = _mm_loadu_si128(xkey + i);
+      __m128i const dk =
+          _mm_xor_si128(d, k); /* uint32 dk[4]  = {d0+k0, d1+k1, d2+k2, d3+k3} */
+      __m128i const mul = _mm_mul_epu32(
+          dk, _mm_shuffle_epi32(dk, 0x31)); /* uint64 mul[2] = {dk0*dk1,dk2*dk3} */
+      if (accWidth == XXH3_acc_128bits) {
+        __m128i const dswap = _mm_shuffle_epi32(d, _MM_SHUFFLE(1, 0, 3, 2));
+        __m128i const add = _mm_add_epi64(xacc[i], dswap);
+        xacc[i] = _mm_add_epi64(mul, add);
+      } else { /* XXH3_acc_64bits */
+        __m128i const add = _mm_add_epi64(xacc[i], d);
+        xacc[i] = _mm_add_epi64(mul, add);
+      }
+    }
+  }
 
 #elif (XXH_VECTOR == XXH_NEON)
 
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-    {
-        XXH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t *) acc;
-        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
-        uint32_t const* const xdata = (const uint32_t *) data;
-        uint32_t const* const xkey  = (const uint32_t *) key;
-
-        size_t i;
-        for (i=0; i < STRIPE_LEN / sizeof(uint64x2_t); i++) {
-#if !defined(__aarch64__) && !defined(__arm64__) && defined(__GNUC__) /* ARM32-specific hack */
-            /* vzip on ARMv7 Clang generates a lot of vmovs (technically vorrs) without this.
-             * vzip on 32-bit ARM NEON will overwrite the original register, and I think that Clang
-             * assumes I don't want to destroy it and tries to make a copy. This slows down the code
-             * a lot.
-             * aarch64 not only uses an entirely different syntax, but it requires three
-             * instructions...
-             *    ext    v1.16B, v0.16B, #8    // select high bits because aarch64 can't address them directly
-             *    zip1   v3.2s, v0.2s, v1.2s   // first zip
-             *    zip2   v2.2s, v0.2s, v1.2s   // second zip
-             * ...to do what ARM does in one:
-             *    vzip.32 d0, d1               // Interleave high and low bits and overwrite. */
-
-            /* data_vec = xdata[i]; */
-            uint32x4_t const data_vec    = vld1q_u32(xdata + (i * 4));
-            /* key_vec  = xkey[i];  */
-            uint32x4_t const key_vec     = vld1q_u32(xkey  + (i * 4));
-            /* data_key = data_vec ^ key_vec; */
-            uint32x4_t       data_key;
-
-            if (accWidth == XXH3_acc_64bits) {
-                /* Add first to prevent register swaps */
-                /* xacc[i] += data_vec; */
-                xacc[i] = vaddq_u64 (xacc[i], vreinterpretq_u64_u32(data_vec));
-            } else {  /* XXH3_acc_128bits */
-                /* xacc[i] += swap(data_vec); */
-                /* can probably be optimized better */
-                uint64x2_t const data64 = vreinterpretq_u64_u32(data_vec);
-                uint64x2_t const swapped= vextq_u64(data64, data64, 1);
-                xacc[i] = vaddq_u64 (xacc[i], swapped);
-            }
-
-            data_key = veorq_u32(data_vec, key_vec);
-
-            /* Here's the magic. We use the quirkiness of vzip to shuffle data_key in place.
-             * shuffle: data_key[0, 1, 2, 3] = data_key[0, 2, 1, 3] */
-            __asm__("vzip.32 %e0, %f0" : "+w" (data_key));
-            /* xacc[i] += (uint64x2_t) data_key[0, 1] * (uint64x2_t) data_key[2, 3]; */
-            xacc[i] = vmlal_u32(xacc[i], vget_low_u32(data_key), vget_high_u32(data_key));
+  XXH_ASSERT((((size_t)acc) & 15) == 0);
+  {
+    XXH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t*)acc;
+    /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7.
+     */
+    uint32_t const* const xdata = (const uint32_t*)data;
+    uint32_t const* const xkey = (const uint32_t*)key;
+
+    size_t i;
+    for (i = 0; i < STRIPE_LEN / sizeof(uint64x2_t); i++) {
+#if !defined(__aarch64__) && !defined(__arm64__) && \
+    defined(__GNUC__) /* ARM32-specific hack */
+      /* vzip on ARMv7 Clang generates a lot of vmovs (technically vorrs)
+       * without this. vzip on 32-bit ARM NEON will overwrite the original
+       * register, and I think that Clang assumes I don't want to destroy it and
+       * tries to make a copy. This slows down the code a lot. aarch64 not only
+       * uses an entirely different syntax, but it requires three
+       * instructions...
+       *    ext    v1.16B, v0.16B, #8    // select high bits because aarch64
+       * can't address them directly zip1   v3.2s, v0.2s, v1.2s   // first zip
+       *    zip2   v2.2s, v0.2s, v1.2s   // second zip
+       * ...to do what ARM does in one:
+       *    vzip.32 d0, d1               // Interleave high and low bits and
+       * overwrite. */
+
+      /* data_vec = xdata[i]; */
+      uint32x4_t const data_vec = vld1q_u32(xdata + (i * 4));
+      /* key_vec  = xkey[i];  */
+      uint32x4_t const key_vec = vld1q_u32(xkey + (i * 4));
+      /* data_key = data_vec ^ key_vec; */
+      uint32x4_t data_key;
+
+      if (accWidth == XXH3_acc_64bits) {
+        /* Add first to prevent register swaps */
+        /* xacc[i] += data_vec; */
+        xacc[i] = vaddq_u64(xacc[i], vreinterpretq_u64_u32(data_vec));
+      } else { /* XXH3_acc_128bits */
+        /* xacc[i] += swap(data_vec); */
+        /* can probably be optimized better */
+        uint64x2_t const data64 = vreinterpretq_u64_u32(data_vec);
+        uint64x2_t const swapped = vextq_u64(data64, data64, 1);
+        xacc[i] = vaddq_u64(xacc[i], swapped);
+      }
+
+      data_key = veorq_u32(data_vec, key_vec);
+
+      /* Here's the magic. We use the quirkiness of vzip to shuffle data_key in
+       * place. shuffle: data_key[0, 1, 2, 3] = data_key[0, 2, 1, 3] */
+      __asm__("vzip.32 %e0, %f0" : "+w"(data_key));
+      /* xacc[i] += (uint64x2_t) data_key[0, 1] * (uint64x2_t) data_key[2, 3];
+       */
+      xacc[i] = vmlal_u32(xacc[i], vget_low_u32(data_key), vget_high_u32(data_key));
 
 #else
-            /* On aarch64, vshrn/vmovn seems to be equivalent to, if not faster than, the vzip method. */
-
-            /* data_vec = xdata[i]; */
-            uint32x4_t const data_vec    = vld1q_u32(xdata + (i * 4));
-            /* key_vec  = xkey[i];  */
-            uint32x4_t const key_vec     = vld1q_u32(xkey  + (i * 4));
-            /* data_key = data_vec ^ key_vec; */
-            uint32x4_t const data_key    = veorq_u32(data_vec, key_vec);
-            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF); */
-            uint32x2_t const data_key_lo = vmovn_u64  (vreinterpretq_u64_u32(data_key));
-            /* data_key_hi = (uint32x2_t) (data_key >> 32); */
-            uint32x2_t const data_key_hi = vshrn_n_u64 (vreinterpretq_u64_u32(data_key), 32);
-            if (accWidth == XXH3_acc_64bits) {
-                /* xacc[i] += data_vec; */
-                xacc[i] = vaddq_u64 (xacc[i], vreinterpretq_u64_u32(data_vec));
-            } else {  /* XXH3_acc_128bits */
-                /* xacc[i] += swap(data_vec); */
-                uint64x2_t const data64 = vreinterpretq_u64_u32(data_vec);
-                uint64x2_t const swapped= vextq_u64(data64, data64, 1);
-                xacc[i] = vaddq_u64 (xacc[i], swapped);
-            }
-            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
-            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
+      /* On aarch64, vshrn/vmovn seems to be equivalent to, if not faster than,
+       * the vzip method. */
+
+      /* data_vec = xdata[i]; */
+      uint32x4_t const data_vec = vld1q_u32(xdata + (i * 4));
+      /* key_vec  = xkey[i];  */
+      uint32x4_t const key_vec = vld1q_u32(xkey + (i * 4));
+      /* data_key = data_vec ^ key_vec; */
+      uint32x4_t const data_key = veorq_u32(data_vec, key_vec);
+      /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF); */
+      uint32x2_t const data_key_lo = vmovn_u64(vreinterpretq_u64_u32(data_key));
+      /* data_key_hi = (uint32x2_t) (data_key >> 32); */
+      uint32x2_t const data_key_hi = vshrn_n_u64(vreinterpretq_u64_u32(data_key), 32);
+      if (accWidth == XXH3_acc_64bits) {
+        /* xacc[i] += data_vec; */
+        xacc[i] = vaddq_u64(xacc[i], vreinterpretq_u64_u32(data_vec));
+      } else { /* XXH3_acc_128bits */
+        /* xacc[i] += swap(data_vec); */
+        uint64x2_t const data64 = vreinterpretq_u64_u32(data_vec);
+        uint64x2_t const swapped = vextq_u64(data64, data64, 1);
+        xacc[i] = vaddq_u64(xacc[i], swapped);
+      }
+      /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
+      xacc[i] = vmlal_u32(xacc[i], data_key_lo, data_key_hi);
 
 #endif
-        }
     }
-
-#elif (XXH_VECTOR == XXH_VSX) && 0   /* <=========================== DISABLED : MUST BE VALIDATED */
-    /* note : vsx code path currently not tested in CI (limitation of cross-compiler and/or emulator)
-     *        for vsx code path to be shipped and supported, it is critical to create a CI test for it */
-          U64x2* const xacc =        (U64x2*) acc;    /* presumed aligned */
-    U64x2 const* const xdata = (U64x2 const*) data;   /* no alignment restriction */
-    U64x2 const* const xkey  = (U64x2 const*) key;    /* no alignment restriction */
-    U64x2 const v32 = { 32,  32 };
-
-    size_t i;
-    for (i = 0; i < STRIPE_LEN / sizeof(U64x2); i++) {
-        /* data_vec = xdata[i]; */
-        /* key_vec = xkey[i]; */
+  }
+
+#elif (XXH_VECTOR == XXH_VSX) && \
+    0 /* <=========================== DISABLED : MUST BE VALIDATED */
+  /* note : vsx code path currently not tested in CI (limitation of
+   * cross-compiler and/or emulator) for vsx code path to be shipped and
+   * supported, it is critical to create a CI test for it */
+  U64x2* const xacc = (U64x2*)acc;               /* presumed aligned */
+  U64x2 const* const xdata = (U64x2 const*)data; /* no alignment restriction */
+  U64x2 const* const xkey = (U64x2 const*)key;   /* no alignment restriction */
+  U64x2 const v32 = {32, 32};
+
+  size_t i;
+  for (i = 0; i < STRIPE_LEN / sizeof(U64x2); i++) {
+    /* data_vec = xdata[i]; */
+    /* key_vec = xkey[i]; */
 #ifdef __BIG_ENDIAN__
-        /* byteswap */
-        U64x2 const data_vec = vec_revb(vec_vsx_ld(0, xdata + i));  /* note : vec_revb is power9+ */
-        U64x2 const key_vec = vec_revb(vec_vsx_ld(0, xkey + i));    /* note : vec_revb is power9+ */
+    /* byteswap */
+    U64x2 const data_vec =
+        vec_revb(vec_vsx_ld(0, xdata + i)); /* note : vec_revb is power9+ */
+    U64x2 const key_vec =
+        vec_revb(vec_vsx_ld(0, xkey + i)); /* note : vec_revb is power9+ */
 #else
-        U64x2 const data_vec = vec_vsx_ld(0, xdata + i);
-        U64x2 const key_vec = vec_vsx_ld(0, xkey + i);
+    U64x2 const data_vec = vec_vsx_ld(0, xdata + i);
+    U64x2 const key_vec = vec_vsx_ld(0, xkey + i);
 #endif
-        U64x2 const data_key = data_vec ^ key_vec;
-        /* shuffled = (data_key << 32) | (data_key >> 32); */
-        U32x4 const shuffled = (U32x4)vec_rl(data_key, v32);
-        /* product = ((U64x2)data_key & 0xFFFFFFFF) * ((U64x2)shuffled & 0xFFFFFFFF); */
-        U64x2 const product = XXH_vsxMultOdd((U32x4)data_key, shuffled);
-
-        xacc[i] += product;
-
-        if (accWidth == XXH3_acc_64bits) {
-            xacc[i] += data_vec;
-        } else {  /* XXH3_acc_128bits */
-            U64x2 const data_swapped = vec_permi(data_vec, data_vec, 2);   /* <===== untested !!! */
-            xacc[i] += data_swapped;
-        }
+    U64x2 const data_key = data_vec ^ key_vec;
+    /* shuffled = (data_key << 32) | (data_key >> 32); */
+    U32x4 const shuffled = (U32x4)vec_rl(data_key, v32);
+    /* product = ((U64x2)data_key & 0xFFFFFFFF) * ((U64x2)shuffled &
+     * 0xFFFFFFFF); */
+    U64x2 const product = XXH_vsxMultOdd((U32x4)data_key, shuffled);
+
+    xacc[i] += product;
+
+    if (accWidth == XXH3_acc_64bits) {
+      xacc[i] += data_vec;
+    } else { /* XXH3_acc_128bits */
+      U64x2 const data_swapped =
+          vec_permi(data_vec, data_vec, 2); /* <===== untested !!! */
+      xacc[i] += data_swapped;
     }
-
-#else   /* scalar variant of Accumulator - universal */
-
-    XXH_ALIGN(XXH_ACC_ALIGN) U64* const xacc = (U64*) acc;    /* presumed aligned on 32-bytes boundaries, little hint for the auto-vectorizer */
-    const char* const xdata = (const char*) data;  /* no alignment restriction */
-    const char* const xkey  = (const char*) key;   /* no alignment restriction */
-    size_t i;
-    XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
-    for (i=0; i < ACC_NB; i+=2) {
-        U64 const in1 = XXH_readLE64(xdata + 8*i);
-        U64 const in2 = XXH_readLE64(xdata + 8*(i+1));
-        U64 const key1  = XXH_readLE64(xkey + 8*i);
-        U64 const key2  = XXH_readLE64(xkey + 8*(i+1));
-        U64 const data_key1 = key1 ^ in1;
-        U64 const data_key2 = key2 ^ in2;
-        xacc[i]   += XXH_mult32to64(data_key1 & 0xFFFFFFFF, data_key1 >> 32);
-        xacc[i+1] += XXH_mult32to64(data_key2 & 0xFFFFFFFF, data_key2 >> 32);
-        if (accWidth == XXH3_acc_128bits) {
-            xacc[i]   += in2;
-            xacc[i+1] += in1;
-        } else {  /* XXH3_acc_64bits */
-            xacc[i]   += in1;
-            xacc[i+1] += in2;
-        }
+  }
+
+#else /* scalar variant of Accumulator - universal */
+
+  XXH_ALIGN(XXH_ACC_ALIGN)
+  U64* const xacc = (U64*)acc;                 /* presumed aligned on 32-bytes boundaries,
+                                                  little hint for the auto-vectorizer */
+  const char* const xdata = (const char*)data; /* no alignment restriction */
+  const char* const xkey = (const char*)key;   /* no alignment restriction */
+  size_t i;
+  XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN - 1)) == 0);
+  for (i = 0; i < ACC_NB; i += 2) {
+    U64 const in1 = XXH_readLE64(xdata + 8 * i);
+    U64 const in2 = XXH_readLE64(xdata + 8 * (i + 1));
+    U64 const key1 = XXH_readLE64(xkey + 8 * i);
+    U64 const key2 = XXH_readLE64(xkey + 8 * (i + 1));
+    U64 const data_key1 = key1 ^ in1;
+    U64 const data_key2 = key2 ^ in2;
+    xacc[i] += XXH_mult32to64(data_key1 & 0xFFFFFFFF, data_key1 >> 32);
+    xacc[i + 1] += XXH_mult32to64(data_key2 & 0xFFFFFFFF, data_key2 >> 32);
+    if (accWidth == XXH3_acc_128bits) {
+      xacc[i] += in2;
+      xacc[i + 1] += in1;
+    } else { /* XXH3_acc_64bits */
+      xacc[i] += in1;
+      xacc[i + 1] += in2;
     }
+  }
 #endif
 }
 
-XXH_FORCE_INLINE void
-XXH3_scrambleAcc(void* XXH_RESTRICT acc, const void* XXH_RESTRICT key)
-{
+XXH_FORCE_INLINE void XXH3_scrambleAcc(void* XXH_RESTRICT acc,
+                                       const void* XXH_RESTRICT key) {
 #if (XXH_VECTOR == XXH_AVX2)
 
-    XXH_ASSERT((((size_t)acc) & 31) == 0);
-    {   XXH_ALIGN(32) __m256i* const xacc = (__m256i*) acc;
-        const         __m256i* const xkey = (const __m256i *) key;   /* not really aligned, just for ptr arithmetic, and because _mm256_loadu_si256() requires this argument type */
-        const __m256i prime32 = _mm256_set1_epi32((int)PRIME32_1);
+  XXH_ASSERT((((size_t)acc) & 31) == 0);
+  {
+    XXH_ALIGN(32) __m256i* const xacc = (__m256i*)acc;
+    const __m256i* const xkey =
+        (const __m256i*)key; /* not really aligned, just for ptr arithmetic, and because
+                                _mm256_loadu_si256() requires this argument type */
+    const __m256i prime32 = _mm256_set1_epi32((int)PRIME32_1);
 
-        size_t i;
-        for (i=0; i < STRIPE_LEN/sizeof(__m256i); i++) {
-            __m256i data = xacc[i];
-            __m256i const shifted = _mm256_srli_epi64(data, 47);
-            data = _mm256_xor_si256(data, shifted);
+    size_t i;
+    for (i = 0; i < STRIPE_LEN / sizeof(__m256i); i++) {
+      __m256i data = xacc[i];
+      __m256i const shifted = _mm256_srli_epi64(data, 47);
+      data = _mm256_xor_si256(data, shifted);
 
-            {   __m256i const k   = _mm256_loadu_si256 (xkey+i);
-                __m256i const dk  = _mm256_xor_si256   (data, k);
+      {
+        __m256i const k = _mm256_loadu_si256(xkey + i);
+        __m256i const dk = _mm256_xor_si256(data, k);
 
-                __m256i const dk1 = _mm256_mul_epu32 (dk, prime32);
+        __m256i const dk1 = _mm256_mul_epu32(dk, prime32);
 
-                __m256i const d2  = _mm256_shuffle_epi32 (dk, 0x31);
-                __m256i const dk2 = _mm256_mul_epu32 (d2, prime32);
-                __m256i const dk2h= _mm256_slli_epi64 (dk2, 32);
+        __m256i const d2 = _mm256_shuffle_epi32(dk, 0x31);
+        __m256i const dk2 = _mm256_mul_epu32(d2, prime32);
+        __m256i const dk2h = _mm256_slli_epi64(dk2, 32);
 
-                xacc[i] = _mm256_add_epi64(dk1, dk2h);
-        }   }
+        xacc[i] = _mm256_add_epi64(dk1, dk2h);
+      }
     }
+  }
 
 #elif (XXH_VECTOR == XXH_SSE2)
 
-    {   XXH_ALIGN(16) __m128i* const xacc = (__m128i*) acc;
-        const         __m128i* const xkey = (const __m128i *) key;   /* not really aligned, just for ptr arithmetic */
-        const __m128i prime32 = _mm_set1_epi32((int)PRIME32_1);
+  {
+    XXH_ALIGN(16) __m128i* const xacc = (__m128i*)acc;
+    const __m128i* const xkey =
+        (const __m128i*)key; /* not really aligned, just for ptr arithmetic */
+    const __m128i prime32 = _mm_set1_epi32((int)PRIME32_1);
 
-        size_t i;
-        for (i=0; i < STRIPE_LEN/sizeof(__m128i); i++) {
-            __m128i data = xacc[i];
-            __m128i const shifted = _mm_srli_epi64(data, 47);
-            data = _mm_xor_si128(data, shifted);
+    size_t i;
+    for (i = 0; i < STRIPE_LEN / sizeof(__m128i); i++) {
+      __m128i data = xacc[i];
+      __m128i const shifted = _mm_srli_epi64(data, 47);
+      data = _mm_xor_si128(data, shifted);
 
-            {   __m128i const k   = _mm_loadu_si128 (xkey+i);
-                __m128i const dk  = _mm_xor_si128   (data,k);
+      {
+        __m128i const k = _mm_loadu_si128(xkey + i);
+        __m128i const dk = _mm_xor_si128(data, k);
 
-                __m128i const dk1 = _mm_mul_epu32 (dk, prime32);
+        __m128i const dk1 = _mm_mul_epu32(dk, prime32);
 
-                __m128i const d2  = _mm_shuffle_epi32 (dk, 0x31);
-                __m128i const dk2 = _mm_mul_epu32 (d2, prime32);
-                __m128i const dk2h= _mm_slli_epi64(dk2, 32);
+        __m128i const d2 = _mm_shuffle_epi32(dk, 0x31);
+        __m128i const dk2 = _mm_mul_epu32(d2, prime32);
+        __m128i const dk2h = _mm_slli_epi64(dk2, 32);
 
-                xacc[i] = _mm_add_epi64(dk1, dk2h);
-        }   }
+        xacc[i] = _mm_add_epi64(dk1, dk2h);
+      }
     }
+  }
 
 #elif (XXH_VECTOR == XXH_NEON)
 
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-
-    {   uint64x2_t* const xacc =     (uint64x2_t*) acc;
-        uint32_t const* const xkey = (uint32_t const*) key;
-        uint32x2_t const prime     = vdup_n_u32 (PRIME32_1);
-
-        size_t i;
-        for (i=0; i < STRIPE_LEN/sizeof(uint64x2_t); i++) {
-            /* data_vec = xacc[i] ^ (xacc[i] >> 47); */
-            uint64x2_t const   acc_vec  = xacc[i];
-            uint64x2_t const   shifted  = vshrq_n_u64 (acc_vec, 47);
-            uint64x2_t const   data_vec = veorq_u64   (acc_vec, shifted);
-
-            /* key_vec  = xkey[i]; */
-            uint32x4_t const   key_vec  = vld1q_u32   (xkey + (i * 4));
-            /* data_key = data_vec ^ key_vec; */
-            uint32x4_t const   data_key = veorq_u32   (vreinterpretq_u32_u64(data_vec), key_vec);
-            /* shuffled = { data_key[0, 2], data_key[1, 3] }; */
-            uint32x2x2_t const shuffled = vzip_u32    (vget_low_u32(data_key), vget_high_u32(data_key));
+  XXH_ASSERT((((size_t)acc) & 15) == 0);
 
-            /* data_key *= PRIME32_1 */
+  {
+    uint64x2_t* const xacc = (uint64x2_t*)acc;
+    uint32_t const* const xkey = (uint32_t const*)key;
+    uint32x2_t const prime = vdup_n_u32(PRIME32_1);
 
-            /* prod_hi = (data_key >> 32) * PRIME32_1; */
-            uint64x2_t const   prod_hi = vmull_u32    (shuffled.val[1], prime);
-            /* xacc[i] = prod_hi << 32; */
-            xacc[i] = vshlq_n_u64(prod_hi, 32);
-            /* xacc[i] += (prod_hi & 0xFFFFFFFF) * PRIME32_1; */
-            xacc[i] = vmlal_u32(xacc[i], shuffled.val[0], prime);
-    }   }
+    size_t i;
+    for (i = 0; i < STRIPE_LEN / sizeof(uint64x2_t); i++) {
+      /* data_vec = xacc[i] ^ (xacc[i] >> 47); */
+      uint64x2_t const acc_vec = xacc[i];
+      uint64x2_t const shifted = vshrq_n_u64(acc_vec, 47);
+      uint64x2_t const data_vec = veorq_u64(acc_vec, shifted);
+
+      /* key_vec  = xkey[i]; */
+      uint32x4_t const key_vec = vld1q_u32(xkey + (i * 4));
+      /* data_key = data_vec ^ key_vec; */
+      uint32x4_t const data_key = veorq_u32(vreinterpretq_u32_u64(data_vec), key_vec);
+      /* shuffled = { data_key[0, 2], data_key[1, 3] }; */
+      uint32x2x2_t const shuffled =
+          vzip_u32(vget_low_u32(data_key), vget_high_u32(data_key));
+
+      /* data_key *= PRIME32_1 */
+
+      /* prod_hi = (data_key >> 32) * PRIME32_1; */
+      uint64x2_t const prod_hi = vmull_u32(shuffled.val[1], prime);
+      /* xacc[i] = prod_hi << 32; */
+      xacc[i] = vshlq_n_u64(prod_hi, 32);
+      /* xacc[i] += (prod_hi & 0xFFFFFFFF) * PRIME32_1; */
+      xacc[i] = vmlal_u32(xacc[i], shuffled.val[0], prime);
+    }
+  }
 
 #elif (XXH_VECTOR == XXH_VSX)
 
-          U64x2* const xacc =       (U64x2*) acc;
-    const U64x2* const xkey = (const U64x2*) key;
-    /* constants */
-    U64x2 const v32  = { 32, 32 };
-    U64x2 const v47 = { 47, 47 };
-    U32x4 const prime = { PRIME32_1, PRIME32_1, PRIME32_1, PRIME32_1 };
-    size_t i;
-
-    for (i = 0; i < STRIPE_LEN / sizeof(U64x2); i++) {
-        U64x2 const acc_vec  = xacc[i];
-        U64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
-        /* key_vec = xkey[i]; */
+  U64x2* const xacc = (U64x2*)acc;
+  const U64x2* const xkey = (const U64x2*)key;
+  /* constants */
+  U64x2 const v32 = {32, 32};
+  U64x2 const v47 = {47, 47};
+  U32x4 const prime = {PRIME32_1, PRIME32_1, PRIME32_1, PRIME32_1};
+  size_t i;
+
+  for (i = 0; i < STRIPE_LEN / sizeof(U64x2); i++) {
+    U64x2 const acc_vec = xacc[i];
+    U64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
+    /* key_vec = xkey[i]; */
 #ifdef __BIG_ENDIAN__
-        /* swap 32-bit words */
-        U64x2 const key_vec  = vec_rl(vec_vsx_ld(0, xkey + i), v32);
+    /* swap 32-bit words */
+    U64x2 const key_vec = vec_rl(vec_vsx_ld(0, xkey + i), v32);
 #else
-        U64x2 const key_vec  = vec_vsx_ld(0, xkey + i);
+    U64x2 const key_vec = vec_vsx_ld(0, xkey + i);
 #endif
-        U64x2 const data_key = data_vec ^ key_vec;
+    U64x2 const data_key = data_vec ^ key_vec;
 
-        /* data_key *= PRIME32_1 */
+    /* data_key *= PRIME32_1 */
 
-        /* prod_lo = ((U64x2)data_key & 0xFFFFFFFF) * ((U64x2)prime & 0xFFFFFFFF);  */
-        U64x2 const prod_lo  = XXH_vsxMultOdd((U32x4)data_key, prime);
-        /* prod_hi = ((U64x2)data_key >> 32) * ((U64x2)prime >> 32);  */
-        U64x2 const prod_hi  = XXH_vsxMultEven((U32x4)data_key, prime);
-        xacc[i] = prod_lo + (prod_hi << v32);
-    }
-
-#else   /* scalar variant of Scrambler - universal */
-
-    XXH_ALIGN(XXH_ACC_ALIGN) U64* const xacc = (U64*) acc;   /* presumed aligned on 32-bytes boundaries, little hint for the auto-vectorizer */
-    const char* const xkey = (const char*) key;   /* no alignment restriction */
-    int i;
-    XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
-
-    for (i=0; i < (int)ACC_NB; i++) {
-        U64 const key64 = XXH_readLE64(xkey + 8*i);
-        U64 acc64 = xacc[i];
-        acc64 ^= acc64 >> 47;
-        acc64 ^= key64;
-        acc64 *= PRIME32_1;
-        xacc[i] = acc64;
-    }
+    /* prod_lo = ((U64x2)data_key & 0xFFFFFFFF) * ((U64x2)prime & 0xFFFFFFFF);
+     */
+    U64x2 const prod_lo = XXH_vsxMultOdd((U32x4)data_key, prime);
+    /* prod_hi = ((U64x2)data_key >> 32) * ((U64x2)prime >> 32);  */
+    U64x2 const prod_hi = XXH_vsxMultEven((U32x4)data_key, prime);
+    xacc[i] = prod_lo + (prod_hi << v32);
+  }
+
+#else /* scalar variant of Scrambler - universal */
+
+  XXH_ALIGN(XXH_ACC_ALIGN)
+  U64* const xacc = (U64*)acc;               /* presumed aligned on 32-bytes boundaries,
+                                                little hint for the auto-vectorizer */
+  const char* const xkey = (const char*)key; /* no alignment restriction */
+  int i;
+  XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN - 1)) == 0);
+
+  for (i = 0; i < (int)ACC_NB; i++) {
+    U64 const key64 = XXH_readLE64(xkey + 8 * i);
+    U64 acc64 = xacc[i];
+    acc64 ^= acc64 >> 47;
+    acc64 ^= key64;
+    acc64 *= PRIME32_1;
+    xacc[i] = acc64;
+  }
 
 #endif
 }
 
 /* assumption : nbStripes will not overflow secret size */
-XXH_FORCE_INLINE void
-XXH3_accumulate(       U64* XXH_RESTRICT acc,
-                const void* XXH_RESTRICT data,
-                const void* XXH_RESTRICT secret,
-                      size_t nbStripes,
-                      XXH3_accWidth_e accWidth)
-{
-    size_t n;
-    /* Clang doesn't unroll this loop without the pragma. Unrolling can be up to 1.4x faster.
-     * The unroll statement seems detrimental for WASM (@aras-p) and ARM though.
-     */
-#if defined(__clang__) && !defined(__OPTIMIZE_SIZE__) && !defined(__ARM_ARCH) && !defined(__EMSCRIPTEN__)
-#  pragma clang loop unroll(enable)
+XXH_FORCE_INLINE void XXH3_accumulate(U64* XXH_RESTRICT acc,
+                                      const void* XXH_RESTRICT data,
+                                      const void* XXH_RESTRICT secret, size_t nbStripes,
+                                      XXH3_accWidth_e accWidth) {
+  size_t n;
+  /* Clang doesn't unroll this loop without the pragma. Unrolling can be up
+   * to 1.4x faster. The unroll statement seems detrimental for WASM (@aras-p)
+   * and ARM though.
+   */
+#if defined(__clang__) && !defined(__OPTIMIZE_SIZE__) && !defined(__ARM_ARCH) && \
+    !defined(__EMSCRIPTEN__)
+#pragma clang loop unroll(enable)
 #endif
 
-    for (n = 0; n < nbStripes; n++ ) {
-        XXH3_accumulate_512(acc,
-               (const char*)data   + n*STRIPE_LEN,
-               (const char*)secret + n*XXH_SECRET_CONSUME_RATE,
-                            accWidth);
-    }
+  for (n = 0; n < nbStripes; n++) {
+    XXH3_accumulate_512(acc, (const char*)data + n * STRIPE_LEN,
+                        (const char*)secret + n * XXH_SECRET_CONSUME_RATE, accWidth);
+  }
 }
 
 /* note : clang auto-vectorizes well in SS2 mode _if_ this function is `static`,
@@ -802,123 +834,125 @@ XXH3_accumulate(       U64* XXH_RESTRICT acc,
  *        However, it auto-vectorizes better AVX2 if it is `FORCE_INLINE`
  *        Pretty much every other modes and compilers prefer `FORCE_INLINE`.
  */
-#if defined(__clang__) && (XXH_VECTOR==0) && !defined(__AVX2__)
+#if defined(__clang__) && (XXH_VECTOR == 0) && !defined(__AVX2__)
 static void
 #else
 XXH_FORCE_INLINE void
 #endif
-XXH3_hashLong_internal_loop( U64* XXH_RESTRICT acc,
-                      const void* XXH_RESTRICT data, size_t len,
-                      const void* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_accWidth_e accWidth)
-{
-    size_t const nb_rounds = (secretSize - STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
-    size_t const block_len = STRIPE_LEN * nb_rounds;
-    size_t const nb_blocks = len / block_len;
-
-    size_t n;
-
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-
-    for (n = 0; n < nb_blocks; n++) {
-        XXH3_accumulate(acc, (const char*)data + n*block_len, secret, nb_rounds, accWidth);
-        XXH3_scrambleAcc(acc, (const char*)secret + secretSize - STRIPE_LEN);
+XXH3_hashLong_internal_loop(U64* XXH_RESTRICT acc, const void* XXH_RESTRICT data,
+                            size_t len, const void* XXH_RESTRICT secret,
+                            size_t secretSize, XXH3_accWidth_e accWidth) {
+  size_t const nb_rounds = (secretSize - STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
+  size_t const block_len = STRIPE_LEN * nb_rounds;
+  size_t const nb_blocks = len / block_len;
+
+  size_t n;
+
+  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+
+  for (n = 0; n < nb_blocks; n++) {
+    XXH3_accumulate(acc, (const char*)data + n * block_len, secret, nb_rounds, accWidth);
+    XXH3_scrambleAcc(acc, (const char*)secret + secretSize - STRIPE_LEN);
+  }
+
+  /* last partial block */
+  XXH_ASSERT(len > STRIPE_LEN);
+  {
+    size_t const nbStripes = (len - (block_len * nb_blocks)) / STRIPE_LEN;
+    XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
+    XXH3_accumulate(acc, (const char*)data + nb_blocks * block_len, secret, nbStripes,
+                    accWidth);
+
+    /* last stripe */
+    if (len & (STRIPE_LEN - 1)) {
+      const void* const p = (const char*)data + len - STRIPE_LEN;
+#define XXH_SECRET_LASTACC_START \
+  7 /* do not align on 8, so that secret is different from scrambler */
+      XXH3_accumulate_512(
+          acc, p,
+          (const char*)secret + secretSize - STRIPE_LEN - XXH_SECRET_LASTACC_START,
+          accWidth);
     }
-
-    /* last partial block */
-    XXH_ASSERT(len > STRIPE_LEN);
-    {   size_t const nbStripes = (len - (block_len * nb_blocks)) / STRIPE_LEN;
-        XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
-        XXH3_accumulate(acc, (const char*)data + nb_blocks*block_len, secret, nbStripes, accWidth);
-
-        /* last stripe */
-        if (len & (STRIPE_LEN - 1)) {
-            const void* const p = (const char*)data + len - STRIPE_LEN;
-#define XXH_SECRET_LASTACC_START 7  /* do not align on 8, so that secret is different from scrambler */
-            XXH3_accumulate_512(acc, p, (const char*)secret + secretSize - STRIPE_LEN - XXH_SECRET_LASTACC_START, accWidth);
-    }   }
+  }
 }
 
-XXH_FORCE_INLINE U64
-XXH3_mix2Accs(const U64* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    const U64* const key64 = (const U64*)secret;
-    return XXH3_mul128_fold64(
-               acc[0] ^ XXH_readLE64(key64),
-               acc[1] ^ XXH_readLE64(key64+1) );
+XXH_FORCE_INLINE U64 XXH3_mix2Accs(const U64* XXH_RESTRICT acc,
+                                   const void* XXH_RESTRICT secret) {
+  const U64* const key64 = (const U64*)secret;
+  return XXH3_mul128_fold64(acc[0] ^ XXH_readLE64(key64),
+                            acc[1] ^ XXH_readLE64(key64 + 1));
 }
 
-static XXH64_hash_t
-XXH3_mergeAccs(const U64* XXH_RESTRICT acc, const void* XXH_RESTRICT secret, U64 start)
-{
-    U64 result64 = start;
+static XXH64_hash_t XXH3_mergeAccs(const U64* XXH_RESTRICT acc,
+                                   const void* XXH_RESTRICT secret, U64 start) {
+  U64 result64 = start;
 
-    result64 += XXH3_mix2Accs(acc+0, (const char*)secret +  0);
-    result64 += XXH3_mix2Accs(acc+2, (const char*)secret + 16);
-    result64 += XXH3_mix2Accs(acc+4, (const char*)secret + 32);
-    result64 += XXH3_mix2Accs(acc+6, (const char*)secret + 48);
+  result64 += XXH3_mix2Accs(acc + 0, (const char*)secret + 0);
+  result64 += XXH3_mix2Accs(acc + 2, (const char*)secret + 16);
+  result64 += XXH3_mix2Accs(acc + 4, (const char*)secret + 32);
+  result64 += XXH3_mix2Accs(acc + 6, (const char*)secret + 48);
 
-    return XXH3_avalanche(result64);
+  return XXH3_avalanche(result64);
 }
 
-#define XXH3_INIT_ACC { PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, \
-                        PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1 };
-
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_hashLong_internal(const void* XXH_RESTRICT data, size_t len,
-                       const void* XXH_RESTRICT secret, size_t secretSize)
-{
-    XXH_ALIGN(XXH_ACC_ALIGN) U64 acc[ACC_NB] = XXH3_INIT_ACC;
-
-    XXH3_hashLong_internal_loop(acc, data, len, secret, secretSize, XXH3_acc_64bits);
-
-    /* converge into final hash */
-    XXH_STATIC_ASSERT(sizeof(acc) == 64);
-#define XXH_SECRET_MERGEACCS_START 11  /* do not align on 8, so that secret is different from accumulator */
-    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
-    return XXH3_mergeAccs(acc, (const char*)secret + XXH_SECRET_MERGEACCS_START, (U64)len * PRIME64_1);
+#define XXH3_INIT_ACC                          \
+  {PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, \
+   PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1};
+
+XXH_FORCE_INLINE XXH64_hash_t XXH3_hashLong_internal(const void* XXH_RESTRICT data,
+                                                     size_t len,
+                                                     const void* XXH_RESTRICT secret,
+                                                     size_t secretSize) {
+  XXH_ALIGN(XXH_ACC_ALIGN) U64 acc[ACC_NB] = XXH3_INIT_ACC;
+
+  XXH3_hashLong_internal_loop(acc, data, len, secret, secretSize, XXH3_acc_64bits);
+
+  /* converge into final hash */
+  XXH_STATIC_ASSERT(sizeof(acc) == 64);
+#define XXH_SECRET_MERGEACCS_START \
+  11 /* do not align on 8, so that secret is different from accumulator */
+  XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+  return XXH3_mergeAccs(acc, (const char*)secret + XXH_SECRET_MERGEACCS_START,
+                        (U64)len * PRIME64_1);
 }
 
-
-XXH_NO_INLINE XXH64_hash_t    /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
-XXH3_hashLong_64b_defaultSecret(const void* XXH_RESTRICT data, size_t len)
-{
-    return XXH3_hashLong_internal(data, len, kSecret, sizeof(kSecret));
+XXH_NO_INLINE XXH64_hash_t /* It's important for performance that XXH3_hashLong
+                              is not inlined. Not sure why (uop cache maybe ?),
+                              but difference is large and easily measurable */
+XXH3_hashLong_64b_defaultSecret(const void* XXH_RESTRICT data, size_t len) {
+  return XXH3_hashLong_internal(data, len, kSecret, sizeof(kSecret));
 }
 
-XXH_NO_INLINE XXH64_hash_t    /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
+XXH_NO_INLINE XXH64_hash_t /* It's important for performance that XXH3_hashLong
+                              is not inlined. Not sure why (uop cache maybe ?),
+                              but difference is large and easily measurable */
 XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT data, size_t len,
-                             const void* XXH_RESTRICT secret, size_t secretSize)
-{
-    return XXH3_hashLong_internal(data, len, secret, secretSize);
+                             const void* XXH_RESTRICT secret, size_t secretSize) {
+  return XXH3_hashLong_internal(data, len, secret, secretSize);
 }
 
-
-XXH_FORCE_INLINE void XXH_writeLE64(void* dst, U64 v64)
-{
-    if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
-    memcpy(dst, &v64, sizeof(v64));
+XXH_FORCE_INLINE void XXH_writeLE64(void* dst, U64 v64) {
+  if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
+  memcpy(dst, &v64, sizeof(v64));
 }
 
 /* XXH3_initKeySeed() :
  * destination `customSecret` is presumed allocated and same size as `kSecret`.
  */
-XXH_FORCE_INLINE void XXH3_initKeySeed(void* customSecret, U64 seed64)
-{
-          char* const dst = (char*)customSecret;
-    const char* const src = (const char*)kSecret;
-    int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
-    int i;
-
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
-
-    for (i=0; i < nbRounds; i++) {
-        XXH_writeLE64(dst + 16*i,     XXH_readLE64(src + 16*i)     + seed64);
-        XXH_writeLE64(dst + 16*i + 8, XXH_readLE64(src + 16*i + 8) - seed64);
-    }
+XXH_FORCE_INLINE void XXH3_initKeySeed(void* customSecret, U64 seed64) {
+  char* const dst = (char*)customSecret;
+  const char* const src = (const char*)kSecret;
+  int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
+  int i;
+
+  XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
+
+  for (i = 0; i < nbRounds; i++) {
+    XXH_writeLE64(dst + 16 * i, XXH_readLE64(src + 16 * i) + seed64);
+    XXH_writeLE64(dst + 16 * i + 8, XXH_readLE64(src + 16 * i + 8) - seed64);
+  }
 }
 
-
 /* XXH3_hashLong_64b_withSeed() :
  * Generate a custom key,
  * based on alteration of default kSecret with the seed,
@@ -926,674 +960,687 @@ XXH_FORCE_INLINE void XXH3_initKeySeed(void* customSecret, U64 seed64)
  * This operation is decently fast but nonetheless costs a little bit of time.
  * Try to avoid it whenever possible (typically when seed==0).
  */
-XXH_NO_INLINE XXH64_hash_t    /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
-XXH3_hashLong_64b_withSeed(const void* data, size_t len, XXH64_hash_t seed)
-{
-    XXH_ALIGN(8) char secret[XXH_SECRET_DEFAULT_SIZE];
-    if (seed==0) return XXH3_hashLong_64b_defaultSecret(data, len);
-    XXH3_initKeySeed(secret, seed);
-    return XXH3_hashLong_internal(data, len, secret, sizeof(secret));
+XXH_NO_INLINE XXH64_hash_t /* It's important for performance that XXH3_hashLong
+                              is not inlined. Not sure why (uop cache maybe ?),
+                              but difference is large and easily measurable */
+XXH3_hashLong_64b_withSeed(const void* data, size_t len, XXH64_hash_t seed) {
+  XXH_ALIGN(8) char secret[XXH_SECRET_DEFAULT_SIZE];
+  if (seed == 0) return XXH3_hashLong_64b_defaultSecret(data, len);
+  XXH3_initKeySeed(secret, seed);
+  return XXH3_hashLong_internal(data, len, secret, sizeof(secret));
 }
 
-
 XXH_FORCE_INLINE U64 XXH3_mix16B(const void* XXH_RESTRICT data,
-                                 const void* XXH_RESTRICT key, U64 seed64)
-{
-    const U64* const key64 = (const U64*)key;
-    U64 const ll1 = XXH_readLE64(data);
-    U64 const ll2 = XXH_readLE64((const BYTE*)data+8);
-    return XXH3_mul128_fold64(
-               ll1 ^ (XXH_readLE64(key64)   + seed64),
-               ll2 ^ (XXH_readLE64(key64+1) - seed64) );
+                                 const void* XXH_RESTRICT key, U64 seed64) {
+  const U64* const key64 = (const U64*)key;
+  U64 const ll1 = XXH_readLE64(data);
+  U64 const ll2 = XXH_readLE64((const BYTE*)data + 8);
+  return XXH3_mul128_fold64(ll1 ^ (XXH_readLE64(key64) + seed64),
+                            ll2 ^ (XXH_readLE64(key64 + 1) - seed64));
 }
 
-
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_len_17to128_64b(const void* XXH_RESTRICT data, size_t len,
-                     const void* XXH_RESTRICT secret, size_t secretSize,
-                     XXH64_hash_t seed)
-{
-    const BYTE* const p = (const BYTE*)data;
-    const char* const key = (const char*)secret;
-
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(16 < len && len <= 128);
-
-    {   U64 acc = len * PRIME64_1;
-        if (len > 32) {
-            if (len > 64) {
-                if (len > 96) {
-                    acc += XXH3_mix16B(p+48, key+96, seed);
-                    acc += XXH3_mix16B(p+len-64, key+112, seed);
-                }
-                acc += XXH3_mix16B(p+32, key+64, seed);
-                acc += XXH3_mix16B(p+len-48, key+80, seed);
-            }
-            acc += XXH3_mix16B(p+16, key+32, seed);
-            acc += XXH3_mix16B(p+len-32, key+48, seed);
+XXH_FORCE_INLINE XXH64_hash_t XXH3_len_17to128_64b(const void* XXH_RESTRICT data,
+                                                   size_t len,
+                                                   const void* XXH_RESTRICT secret,
+                                                   size_t secretSize, XXH64_hash_t seed) {
+  const BYTE* const p = (const BYTE*)data;
+  const char* const key = (const char*)secret;
+
+  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+  (void)secretSize;
+  XXH_ASSERT(16 < len && len <= 128);
+
+  {
+    U64 acc = len * PRIME64_1;
+    if (len > 32) {
+      if (len > 64) {
+        if (len > 96) {
+          acc += XXH3_mix16B(p + 48, key + 96, seed);
+          acc += XXH3_mix16B(p + len - 64, key + 112, seed);
         }
-        acc += XXH3_mix16B(p+0, key+0, seed);
-        acc += XXH3_mix16B(p+len-16, key+16, seed);
-
-        return XXH3_avalanche(acc);
+        acc += XXH3_mix16B(p + 32, key + 64, seed);
+        acc += XXH3_mix16B(p + len - 48, key + 80, seed);
+      }
+      acc += XXH3_mix16B(p + 16, key + 32, seed);
+      acc += XXH3_mix16B(p + len - 32, key + 48, seed);
     }
+    acc += XXH3_mix16B(p + 0, key + 0, seed);
+    acc += XXH3_mix16B(p + len - 16, key + 16, seed);
+
+    return XXH3_avalanche(acc);
+  }
 }
 
 #define XXH3_MIDSIZE_MAX 240
 
-XXH_NO_INLINE XXH64_hash_t
-XXH3_len_129to240_64b(const void* XXH_RESTRICT data, size_t len,
-                      const void* XXH_RESTRICT secret, size_t secretSize,
-                      XXH64_hash_t seed)
-{
-    const BYTE* const p = (const BYTE*)data;
-    const char* const key = (const char*)secret;
-
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
-
-    #define XXH3_MIDSIZE_STARTOFFSET 3
-    #define XXH3_MIDSIZE_LASTOFFSET  17
-
-    {   U64 acc = len * PRIME64_1;
-        int const nbRounds = (int)len / 16;
-        int i;
-        for (i=0; i<8; i++) {
-            acc += XXH3_mix16B(p+(16*i), key+(16*i), seed);
-        }
-        acc = XXH3_avalanche(acc);
-        XXH_ASSERT(nbRounds >= 8);
-        for (i=8 ; i < nbRounds; i++) {
-            acc += XXH3_mix16B(p+(16*i), key+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
-        }
-        /* last bytes */
-        acc += XXH3_mix16B(p + len - 16, key + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
-        return XXH3_avalanche(acc);
+XXH_NO_INLINE XXH64_hash_t XXH3_len_129to240_64b(const void* XXH_RESTRICT data,
+                                                 size_t len,
+                                                 const void* XXH_RESTRICT secret,
+                                                 size_t secretSize, XXH64_hash_t seed) {
+  const BYTE* const p = (const BYTE*)data;
+  const char* const key = (const char*)secret;
+
+  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+  (void)secretSize;
+  XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+#define XXH3_MIDSIZE_STARTOFFSET 3
+#define XXH3_MIDSIZE_LASTOFFSET 17
+
+  {
+    U64 acc = len * PRIME64_1;
+    int const nbRounds = (int)len / 16;
+    int i;
+    for (i = 0; i < 8; i++) {
+      acc += XXH3_mix16B(p + (16 * i), key + (16 * i), seed);
     }
+    acc = XXH3_avalanche(acc);
+    XXH_ASSERT(nbRounds >= 8);
+    for (i = 8; i < nbRounds; i++) {
+      acc += XXH3_mix16B(p + (16 * i), key + (16 * (i - 8)) + XXH3_MIDSIZE_STARTOFFSET,
+                         seed);
+    }
+    /* last bytes */
+    acc += XXH3_mix16B(p + len - 16, key + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET,
+                       seed);
+    return XXH3_avalanche(acc);
+  }
 }
 
 /* ===   Public entry point   === */
 
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len)
-{
-    if (len <= 16) return XXH3_len_0to16_64b(data, len, kSecret, 0);
-    if (len <= 128) return XXH3_len_17to128_64b(data, len, kSecret, sizeof(kSecret), 0);
-    if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_64b(data, len, kSecret, sizeof(kSecret), 0);
-    return XXH3_hashLong_64b_defaultSecret(data, len);
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len) {
+  if (len <= 16) return XXH3_len_0to16_64b(data, len, kSecret, 0);
+  if (len <= 128) return XXH3_len_17to128_64b(data, len, kSecret, sizeof(kSecret), 0);
+  if (len <= XXH3_MIDSIZE_MAX)
+    return XXH3_len_129to240_64b(data, len, kSecret, sizeof(kSecret), 0);
+  return XXH3_hashLong_64b_defaultSecret(data, len);
 }
 
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize)
-{
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-    /* if an action must be taken should `secret` conditions not be respected,
-     * it should be done here.
-     * For now, it's a contract pre-condition.
-     * Adding a check and a branch here would cost performance at every hash */
-     if (len <= 16) return XXH3_len_0to16_64b(data, len, secret, 0);
-     if (len <= 128) return XXH3_len_17to128_64b(data, len, secret, secretSize, 0);
-     if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_64b(data, len, secret, secretSize, 0);
-     return XXH3_hashLong_64b_withSecret(data, len, secret, secretSize);
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len,
+                                                   const void* secret,
+                                                   size_t secretSize) {
+  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+  /* if an action must be taken should `secret` conditions not be respected,
+   * it should be done here.
+   * For now, it's a contract pre-condition.
+   * Adding a check and a branch here would cost performance at every hash */
+  if (len <= 16) return XXH3_len_0to16_64b(data, len, secret, 0);
+  if (len <= 128) return XXH3_len_17to128_64b(data, len, secret, secretSize, 0);
+  if (len <= XXH3_MIDSIZE_MAX)
+    return XXH3_len_129to240_64b(data, len, secret, secretSize, 0);
+  return XXH3_hashLong_64b_withSecret(data, len, secret, secretSize);
 }
 
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed)
-{
-    if (len <= 16) return XXH3_len_0to16_64b(data, len, kSecret, seed);
-    if (len <= 128) return XXH3_len_17to128_64b(data, len, kSecret, sizeof(kSecret), seed);
-    if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_64b(data, len, kSecret, sizeof(kSecret), seed);
-    return XXH3_hashLong_64b_withSeed(data, len, seed);
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len,
+                                                 XXH64_hash_t seed) {
+  if (len <= 16) return XXH3_len_0to16_64b(data, len, kSecret, seed);
+  if (len <= 128) return XXH3_len_17to128_64b(data, len, kSecret, sizeof(kSecret), seed);
+  if (len <= XXH3_MIDSIZE_MAX)
+    return XXH3_len_129to240_64b(data, len, kSecret, sizeof(kSecret), seed);
+  return XXH3_hashLong_64b_withSeed(data, len, seed);
 }
 
 /* ===   XXH3 streaming   === */
 
-XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
-{
-    return (XXH3_state_t*)XXH_malloc(sizeof(XXH3_state_t));
+XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void) {
+  return (XXH3_state_t*)XXH_malloc(sizeof(XXH3_state_t));
 }
 
-XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
-{
-    XXH_free(statePtr);
-    return XXH_OK;
+XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr) {
+  XXH_free(statePtr);
+  return XXH_OK;
 }
 
-XXH_PUBLIC_API void
-XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
-{
-    memcpy(dst_state, src_state, sizeof(*dst_state));
+XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state,
+                                   const XXH3_state_t* src_state) {
+  memcpy(dst_state, src_state, sizeof(*dst_state));
 }
 
-static void
-XXH3_64bits_reset_internal(XXH3_state_t* statePtr,
-                           XXH64_hash_t seed,
-                           const void* secret, size_t secretSize)
-{
-    XXH_ASSERT(statePtr != NULL);
-    memset(statePtr, 0, sizeof(*statePtr));
-    statePtr->acc[0] = PRIME32_3;
-    statePtr->acc[1] = PRIME64_1;
-    statePtr->acc[2] = PRIME64_2;
-    statePtr->acc[3] = PRIME64_3;
-    statePtr->acc[4] = PRIME64_4;
-    statePtr->acc[5] = PRIME32_2;
-    statePtr->acc[6] = PRIME64_5;
-    statePtr->acc[7] = PRIME32_1;
-    statePtr->seed = seed;
-    XXH_ASSERT(secret != NULL);
-    statePtr->secret = secret;
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-    statePtr->secretLimit = (XXH32_hash_t)(secretSize - STRIPE_LEN);
-    statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
+static void XXH3_64bits_reset_internal(XXH3_state_t* statePtr, XXH64_hash_t seed,
+                                       const void* secret, size_t secretSize) {
+  XXH_ASSERT(statePtr != NULL);
+  memset(statePtr, 0, sizeof(*statePtr));
+  statePtr->acc[0] = PRIME32_3;
+  statePtr->acc[1] = PRIME64_1;
+  statePtr->acc[2] = PRIME64_2;
+  statePtr->acc[3] = PRIME64_3;
+  statePtr->acc[4] = PRIME64_4;
+  statePtr->acc[5] = PRIME32_2;
+  statePtr->acc[6] = PRIME64_5;
+  statePtr->acc[7] = PRIME32_1;
+  statePtr->seed = seed;
+  XXH_ASSERT(secret != NULL);
+  statePtr->secret = secret;
+  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+  statePtr->secretLimit = (XXH32_hash_t)(secretSize - STRIPE_LEN);
+  statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
 }
 
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset(XXH3_state_t* statePtr)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    XXH3_64bits_reset_internal(statePtr, 0, kSecret, XXH_SECRET_DEFAULT_SIZE);
-    return XXH_OK;
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr) {
+  if (statePtr == NULL) return XXH_ERROR;
+  XXH3_64bits_reset_internal(statePtr, 0, kSecret, XXH_SECRET_DEFAULT_SIZE);
+  return XXH_OK;
 }
 
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    XXH3_64bits_reset_internal(statePtr, 0, secret, secretSize);
-    if (secret == NULL) return XXH_ERROR;
-    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
-    return XXH_OK;
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr,
+                                                          const void* secret,
+                                                          size_t secretSize) {
+  if (statePtr == NULL) return XXH_ERROR;
+  XXH3_64bits_reset_internal(statePtr, 0, secret, secretSize);
+  if (secret == NULL) return XXH_ERROR;
+  if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+  return XXH_OK;
 }
 
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    XXH3_64bits_reset_internal(statePtr, seed, kSecret, XXH_SECRET_DEFAULT_SIZE);
-    XXH3_initKeySeed(statePtr->customSecret, seed);
-    statePtr->secret = statePtr->customSecret;
-    return XXH_OK;
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr,
+                                                        XXH64_hash_t seed) {
+  if (statePtr == NULL) return XXH_ERROR;
+  XXH3_64bits_reset_internal(statePtr, seed, kSecret, XXH_SECRET_DEFAULT_SIZE);
+  XXH3_initKeySeed(statePtr->customSecret, seed);
+  statePtr->secret = statePtr->customSecret;
+  return XXH_OK;
 }
 
-XXH_FORCE_INLINE void
-XXH3_consumeStripes( U64* acc,
-                            XXH32_hash_t* nbStripesSoFarPtr, XXH32_hash_t nbStripesPerBlock,
-                            const void* data, size_t totalStripes,
-                            const void* secret, size_t secretLimit,
-                            XXH3_accWidth_e accWidth)
-{
-    XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
-    if (nbStripesPerBlock - *nbStripesSoFarPtr <= totalStripes) {
-        /* need a scrambling operation */
-        size_t const nbStripes = nbStripesPerBlock - *nbStripesSoFarPtr;
-        XXH3_accumulate(acc, data, (const char*)secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, accWidth);
-        XXH3_scrambleAcc(acc, (const char*)secret + secretLimit);
-        XXH3_accumulate(acc, (const char*)data + nbStripes * STRIPE_LEN, secret, totalStripes - nbStripes, accWidth);
-        *nbStripesSoFarPtr = (XXH32_hash_t)(totalStripes - nbStripes);
-    } else {
-        XXH3_accumulate(acc, data, (const char*)secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, totalStripes, accWidth);
-        *nbStripesSoFarPtr += (XXH32_hash_t)totalStripes;
-    }
+XXH_FORCE_INLINE void XXH3_consumeStripes(U64* acc, XXH32_hash_t* nbStripesSoFarPtr,
+                                          XXH32_hash_t nbStripesPerBlock,
+                                          const void* data, size_t totalStripes,
+                                          const void* secret, size_t secretLimit,
+                                          XXH3_accWidth_e accWidth) {
+  XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
+  if (nbStripesPerBlock - *nbStripesSoFarPtr <= totalStripes) {
+    /* need a scrambling operation */
+    size_t const nbStripes = nbStripesPerBlock - *nbStripesSoFarPtr;
+    XXH3_accumulate(acc, data,
+                    (const char*)secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE,
+                    nbStripes, accWidth);
+    XXH3_scrambleAcc(acc, (const char*)secret + secretLimit);
+    XXH3_accumulate(acc, (const char*)data + nbStripes * STRIPE_LEN, secret,
+                    totalStripes - nbStripes, accWidth);
+    *nbStripesSoFarPtr = (XXH32_hash_t)(totalStripes - nbStripes);
+  } else {
+    XXH3_accumulate(acc, data,
+                    (const char*)secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE,
+                    totalStripes, accWidth);
+    *nbStripesSoFarPtr += (XXH32_hash_t)totalStripes;
+  }
 }
 
-XXH_FORCE_INLINE XXH_errorcode
-XXH3_update(XXH3_state_t* state, const void* input, size_t len, XXH3_accWidth_e accWidth)
-{
-    if (input==NULL)
-#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
-        return XXH_OK;
+XXH_FORCE_INLINE XXH_errorcode XXH3_update(XXH3_state_t* state, const void* input,
+                                           size_t len, XXH3_accWidth_e accWidth) {
+  if (input == NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER >= 1)
+    return XXH_OK;
 #else
-        return XXH_ERROR;
+    return XXH_ERROR;
 #endif
 
-    {   const BYTE* p = (const BYTE*)input;
-        const BYTE* const bEnd = p + len;
+  {
+    const BYTE* p = (const BYTE*)input;
+    const BYTE* const bEnd = p + len;
 
-        state->totalLen += len;
+    state->totalLen += len;
 
-        if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {  /* fill in tmp buffer */
-            XXH_memcpy(state->buffer + state->bufferedSize, input, len);
-            state->bufferedSize += (XXH32_hash_t)len;
-            return XXH_OK;
-        }
-        /* input now > XXH3_INTERNALBUFFER_SIZE */
-
-        #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / STRIPE_LEN)
-        XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % STRIPE_LEN == 0);   /* clean multiple */
-
-        if (state->bufferedSize) {   /* some data within internal buffer: fill then consume it */
-            size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
-            XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
-            p += loadSize;
-            XXH3_consumeStripes(state->acc,
-                               &state->nbStripesSoFar, state->nbStripesPerBlock,
-                                state->buffer, XXH3_INTERNALBUFFER_STRIPES,
-                                state->secret, state->secretLimit,
-                                accWidth);
-            state->bufferedSize = 0;
-        }
+    if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) { /* fill in tmp buffer */
+      XXH_memcpy(state->buffer + state->bufferedSize, input, len);
+      state->bufferedSize += (XXH32_hash_t)len;
+      return XXH_OK;
+    }
+    /* input now > XXH3_INTERNALBUFFER_SIZE */
+
+#define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / STRIPE_LEN)
+    XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % STRIPE_LEN == 0); /* clean multiple */
+
+    if (state->bufferedSize) { /* some data within internal buffer: fill then
+                                  consume it */
+      size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
+      XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
+      p += loadSize;
+      XXH3_consumeStripes(state->acc, &state->nbStripesSoFar, state->nbStripesPerBlock,
+                          state->buffer, XXH3_INTERNALBUFFER_STRIPES, state->secret,
+                          state->secretLimit, accWidth);
+      state->bufferedSize = 0;
+    }
 
-        /* consume input by full buffer quantities */
-        if (p+XXH3_INTERNALBUFFER_SIZE <= bEnd) {
-            const BYTE* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
-            do {
-                XXH3_consumeStripes(state->acc,
-                                   &state->nbStripesSoFar, state->nbStripesPerBlock,
-                                    p, XXH3_INTERNALBUFFER_STRIPES,
-                                    state->secret, state->secretLimit,
-                                    accWidth);
-                p += XXH3_INTERNALBUFFER_SIZE;
-            } while (p<=limit);
-        }
+    /* consume input by full buffer quantities */
+    if (p + XXH3_INTERNALBUFFER_SIZE <= bEnd) {
+      const BYTE* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
+      do {
+        XXH3_consumeStripes(state->acc, &state->nbStripesSoFar, state->nbStripesPerBlock,
+                            p, XXH3_INTERNALBUFFER_STRIPES, state->secret,
+                            state->secretLimit, accWidth);
+        p += XXH3_INTERNALBUFFER_SIZE;
+      } while (p <= limit);
+    }
 
-        if (p < bEnd) { /* some remaining input data : buffer it */
-            XXH_memcpy(state->buffer, p, (size_t)(bEnd-p));
-            state->bufferedSize = (XXH32_hash_t)(bEnd-p);
-        }
+    if (p < bEnd) { /* some remaining input data : buffer it */
+      XXH_memcpy(state->buffer, p, (size_t)(bEnd - p));
+      state->bufferedSize = (XXH32_hash_t)(bEnd - p);
     }
+  }
 
-    return XXH_OK;
+  return XXH_OK;
 }
 
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
-{
-    return XXH3_update(state, input, len, XXH3_acc_64bits);
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH3_state_t* state, const void* input,
+                                                size_t len) {
+  return XXH3_update(state, input, len, XXH3_acc_64bits);
 }
 
-
-XXH_FORCE_INLINE void
-XXH3_digest_long (XXH64_hash_t* acc, const XXH3_state_t* state, XXH3_accWidth_e accWidth)
-{
-    memcpy(acc, state->acc, sizeof(state->acc));  /* digest locally, state remains unaltered, and can continue ingesting more data afterwards */
-    if (state->bufferedSize >= STRIPE_LEN) {
-        size_t const totalNbStripes = state->bufferedSize / STRIPE_LEN;
-        XXH32_hash_t nbStripesSoFar = state->nbStripesSoFar;
-        XXH3_consumeStripes(acc,
-                           &nbStripesSoFar, state->nbStripesPerBlock,
-                            state->buffer, totalNbStripes,
-                            state->secret, state->secretLimit,
-                            accWidth);
-        if (state->bufferedSize % STRIPE_LEN) {  /* one last partial stripe */
-            XXH3_accumulate_512(acc,
-                                state->buffer + state->bufferedSize - STRIPE_LEN,
-                   (const char*)state->secret + state->secretLimit - XXH_SECRET_LASTACC_START,
-                                accWidth);
-        }
-    } else {  /* bufferedSize < STRIPE_LEN */
-        if (state->bufferedSize) { /* one last stripe */
-            char lastStripe[STRIPE_LEN];
-            size_t const catchupSize = STRIPE_LEN - state->bufferedSize;
-            memcpy(lastStripe, (const char*)state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
-            memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
-            XXH3_accumulate_512(acc,
-                                lastStripe,
-                   (const char*)state->secret + state->secretLimit - XXH_SECRET_LASTACC_START,
-                                accWidth);
-    }   }
+XXH_FORCE_INLINE void XXH3_digest_long(XXH64_hash_t* acc, const XXH3_state_t* state,
+                                       XXH3_accWidth_e accWidth) {
+  memcpy(acc, state->acc,
+         sizeof(state->acc)); /* digest locally, state remains unaltered, and
+                                 can continue ingesting more data afterwards */
+  if (state->bufferedSize >= STRIPE_LEN) {
+    size_t const totalNbStripes = state->bufferedSize / STRIPE_LEN;
+    XXH32_hash_t nbStripesSoFar = state->nbStripesSoFar;
+    XXH3_consumeStripes(acc, &nbStripesSoFar, state->nbStripesPerBlock, state->buffer,
+                        totalNbStripes, state->secret, state->secretLimit, accWidth);
+    if (state->bufferedSize % STRIPE_LEN) { /* one last partial stripe */
+      XXH3_accumulate_512(
+          acc, state->buffer + state->bufferedSize - STRIPE_LEN,
+          (const char*)state->secret + state->secretLimit - XXH_SECRET_LASTACC_START,
+          accWidth);
+    }
+  } else {                     /* bufferedSize < STRIPE_LEN */
+    if (state->bufferedSize) { /* one last stripe */
+      char lastStripe[STRIPE_LEN];
+      size_t const catchupSize = STRIPE_LEN - state->bufferedSize;
+      memcpy(lastStripe, (const char*)state->buffer + sizeof(state->buffer) - catchupSize,
+             catchupSize);
+      memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
+      XXH3_accumulate_512(
+          acc, lastStripe,
+          (const char*)state->secret + state->secretLimit - XXH_SECRET_LASTACC_START,
+          accWidth);
+    }
+  }
 }
 
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
-{
-    if (state->totalLen > XXH3_MIDSIZE_MAX) {
-        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[ACC_NB];
-        XXH3_digest_long(acc, state, XXH3_acc_64bits);
-        return XXH3_mergeAccs(acc, (const char*)state->secret + XXH_SECRET_MERGEACCS_START, (U64)state->totalLen * PRIME64_1);
-    }
-    /* len <= XXH3_MIDSIZE_MAX : short code */
-    if (state->seed)
-        return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
-    return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen), state->secret, state->secretLimit + STRIPE_LEN);
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest(const XXH3_state_t* state) {
+  if (state->totalLen > XXH3_MIDSIZE_MAX) {
+    XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[ACC_NB];
+    XXH3_digest_long(acc, state, XXH3_acc_64bits);
+    return XXH3_mergeAccs(acc, (const char*)state->secret + XXH_SECRET_MERGEACCS_START,
+                          (U64)state->totalLen * PRIME64_1);
+  }
+  /* len <= XXH3_MIDSIZE_MAX : short code */
+  if (state->seed)
+    return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+  return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen), state->secret,
+                                state->secretLimit + STRIPE_LEN);
 }
 
 /* ==========================================
  * XXH3 128 bits (=> XXH128)
  * ========================================== */
 
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_len_1to3_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
-{
-    XXH_ASSERT(data != NULL);
-    XXH_ASSERT(1 <= len && len <= 3);
-    XXH_ASSERT(keyPtr != NULL);
-    {   const U32* const key32 = (const U32*) keyPtr;
-        BYTE const c1 = ((const BYTE*)data)[0];
-        BYTE const c2 = ((const BYTE*)data)[len >> 1];
-        BYTE const c3 = ((const BYTE*)data)[len - 1];
-        U32  const combinedl = ((U32)c1) + (((U32)c2) << 8) + (((U32)c3) << 16) + (((U32)len) << 24);
-        U32  const combinedh = XXH_swap32(combinedl);
-        U64  const keyedl = (U64)combinedl ^ (XXH_readLE32(key32)   + seed);
-        U64  const keyedh = (U64)combinedh ^ (XXH_readLE32(key32+1) - seed);
-        U64  const mixedl = keyedl * PRIME64_1;
-        U64  const mixedh = keyedh * PRIME64_2;
-        XXH128_hash_t const h128 = { XXH3_avalanche(mixedl) /*low64*/, XXH3_avalanche(mixedh) /*high64*/ };
-        return h128;
-    }
+XXH_FORCE_INLINE XXH128_hash_t XXH3_len_1to3_128b(const void* data, size_t len,
+                                                  const void* keyPtr, XXH64_hash_t seed) {
+  XXH_ASSERT(data != NULL);
+  XXH_ASSERT(1 <= len && len <= 3);
+  XXH_ASSERT(keyPtr != NULL);
+  {
+    const U32* const key32 = (const U32*)keyPtr;
+    BYTE const c1 = ((const BYTE*)data)[0];
+    BYTE const c2 = ((const BYTE*)data)[len >> 1];
+    BYTE const c3 = ((const BYTE*)data)[len - 1];
+    U32 const combinedl =
+        ((U32)c1) + (((U32)c2) << 8) + (((U32)c3) << 16) + (((U32)len) << 24);
+    U32 const combinedh = XXH_swap32(combinedl);
+    U64 const keyedl = (U64)combinedl ^ (XXH_readLE32(key32) + seed);
+    U64 const keyedh = (U64)combinedh ^ (XXH_readLE32(key32 + 1) - seed);
+    U64 const mixedl = keyedl * PRIME64_1;
+    U64 const mixedh = keyedh * PRIME64_2;
+    XXH128_hash_t const h128 = {XXH3_avalanche(mixedl) /*low64*/,
+                                XXH3_avalanche(mixedh) /*high64*/};
+    return h128;
+  }
 }
 
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_len_4to8_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
-{
-    XXH_ASSERT(data != NULL);
-    XXH_ASSERT(keyPtr != NULL);
-    XXH_ASSERT(4 <= len && len <= 8);
-    {   U32 const in1 = XXH_readLE32(data);
-        U32 const in2 = XXH_readLE32((const BYTE*)data + len - 4);
-        U64 const in64l = in1 + ((U64)in2 << 32);
-        U64 const in64h = XXH_swap64(in64l);
-        U64 const keyedl = in64l ^ (XXH_readLE64(keyPtr) + seed);
-        U64 const keyedh = in64h ^ (XXH_readLE64((const char*)keyPtr + 8) - seed);
-        U64 const mix64l1 = len + ((keyedl ^ (keyedl >> 51)) * PRIME32_1);
-        U64 const mix64l2 = (mix64l1 ^ (mix64l1 >> 47)) * PRIME64_2;
-        U64 const mix64h1 = ((keyedh ^ (keyedh >> 47)) * PRIME64_1) - len;
-        U64 const mix64h2 = (mix64h1 ^ (mix64h1 >> 43)) * PRIME64_4;
-        {   XXH128_hash_t const h128 = { XXH3_avalanche(mix64l2) /*low64*/, XXH3_avalanche(mix64h2) /*high64*/ };
-            return h128;
-    }   }
+XXH_FORCE_INLINE XXH128_hash_t XXH3_len_4to8_128b(const void* data, size_t len,
+                                                  const void* keyPtr, XXH64_hash_t seed) {
+  XXH_ASSERT(data != NULL);
+  XXH_ASSERT(keyPtr != NULL);
+  XXH_ASSERT(4 <= len && len <= 8);
+  {
+    U32 const in1 = XXH_readLE32(data);
+    U32 const in2 = XXH_readLE32((const BYTE*)data + len - 4);
+    U64 const in64l = in1 + ((U64)in2 << 32);
+    U64 const in64h = XXH_swap64(in64l);
+    U64 const keyedl = in64l ^ (XXH_readLE64(keyPtr) + seed);
+    U64 const keyedh = in64h ^ (XXH_readLE64((const char*)keyPtr + 8) - seed);
+    U64 const mix64l1 = len + ((keyedl ^ (keyedl >> 51)) * PRIME32_1);
+    U64 const mix64l2 = (mix64l1 ^ (mix64l1 >> 47)) * PRIME64_2;
+    U64 const mix64h1 = ((keyedh ^ (keyedh >> 47)) * PRIME64_1) - len;
+    U64 const mix64h2 = (mix64h1 ^ (mix64h1 >> 43)) * PRIME64_4;
+    {
+      XXH128_hash_t const h128 = {XXH3_avalanche(mix64l2) /*low64*/,
+                                  XXH3_avalanche(mix64h2) /*high64*/};
+      return h128;
+    }
+  }
 }
 
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_len_9to16_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
-{
-    XXH_ASSERT(data != NULL);
-    XXH_ASSERT(keyPtr != NULL);
-    XXH_ASSERT(9 <= len && len <= 16);
-    {   const U64* const key64 = (const U64*) keyPtr;
-        U64 const ll1 = XXH_readLE64(data) ^ (XXH_readLE64(key64) + seed);
-        U64 const ll2 = XXH_readLE64((const BYTE*)data + len - 8) ^ (XXH_readLE64(key64+1) - seed);
-        U64 const inlow = ll1 ^ ll2;
-        XXH128_hash_t m128 = XXH3_mul128(inlow, PRIME64_1);
-        m128.high64 += ll2 * PRIME64_1;
-        m128.low64  ^= (m128.high64 >> 32);
-        {   XXH128_hash_t h128 = XXH3_mul128(m128.low64, PRIME64_2);
-            h128.high64 += m128.high64 * PRIME64_2;
-            h128.low64   = XXH3_avalanche(h128.low64);
-            h128.high64  = XXH3_avalanche(h128.high64);
-            return h128;
-    }   }
+XXH_FORCE_INLINE XXH128_hash_t XXH3_len_9to16_128b(const void* data, size_t len,
+                                                   const void* keyPtr,
+                                                   XXH64_hash_t seed) {
+  XXH_ASSERT(data != NULL);
+  XXH_ASSERT(keyPtr != NULL);
+  XXH_ASSERT(9 <= len && len <= 16);
+  {
+    const U64* const key64 = (const U64*)keyPtr;
+    U64 const ll1 = XXH_readLE64(data) ^ (XXH_readLE64(key64) + seed);
+    U64 const ll2 =
+        XXH_readLE64((const BYTE*)data + len - 8) ^ (XXH_readLE64(key64 + 1) - seed);
+    U64 const inlow = ll1 ^ ll2;
+    XXH128_hash_t m128 = XXH3_mul128(inlow, PRIME64_1);
+    m128.high64 += ll2 * PRIME64_1;
+    m128.low64 ^= (m128.high64 >> 32);
+    {
+      XXH128_hash_t h128 = XXH3_mul128(m128.low64, PRIME64_2);
+      h128.high64 += m128.high64 * PRIME64_2;
+      h128.low64 = XXH3_avalanche(h128.low64);
+      h128.high64 = XXH3_avalanche(h128.high64);
+      return h128;
+    }
+  }
 }
 
 /* Assumption : `secret` size is >= 16
  * Note : it should be >= XXH3_SECRET_SIZE_MIN anyway */
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_len_0to16_128b(const void* data, size_t len, const void* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(len <= 16);
-    {   if (len > 8) return XXH3_len_9to16_128b(data, len, secret, seed);
-        if (len >= 4) return XXH3_len_4to8_128b(data, len, secret, seed);
-        if (len) return XXH3_len_1to3_128b(data, len, secret, seed);
-        {   XXH128_hash_t const h128 = { 0, 0 };
-            return h128;
-    }   }
+XXH_FORCE_INLINE XXH128_hash_t XXH3_len_0to16_128b(const void* data, size_t len,
+                                                   const void* secret,
+                                                   XXH64_hash_t seed) {
+  XXH_ASSERT(len <= 16);
+  {
+    if (len > 8) return XXH3_len_9to16_128b(data, len, secret, seed);
+    if (len >= 4) return XXH3_len_4to8_128b(data, len, secret, seed);
+    if (len) return XXH3_len_1to3_128b(data, len, secret, seed);
+    {
+      XXH128_hash_t const h128 = {0, 0};
+      return h128;
+    }
+  }
 }
 
 XXH_FORCE_INLINE XXH128_hash_t
 XXH3_hashLong_128b_internal(const void* XXH_RESTRICT data, size_t len,
-                            const void* XXH_RESTRICT secret, size_t secretSize)
-{
-    XXH_ALIGN(XXH_ACC_ALIGN) U64 acc[ACC_NB] = XXH3_INIT_ACC;
-
-    XXH3_hashLong_internal_loop(acc, data, len, secret, secretSize, XXH3_acc_128bits);
-
-    /* converge into final hash */
-    XXH_STATIC_ASSERT(sizeof(acc) == 64);
-    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
-    {   U64 const low64 = XXH3_mergeAccs(acc, (const char*)secret + XXH_SECRET_MERGEACCS_START, (U64)len * PRIME64_1);
-        U64 const high64 = XXH3_mergeAccs(acc, (const char*)secret + secretSize - sizeof(acc) - XXH_SECRET_MERGEACCS_START, ~((U64)len * PRIME64_2));
-        XXH128_hash_t const h128 = { low64, high64 };
-        return h128;
-    }
+                            const void* XXH_RESTRICT secret, size_t secretSize) {
+  XXH_ALIGN(XXH_ACC_ALIGN) U64 acc[ACC_NB] = XXH3_INIT_ACC;
+
+  XXH3_hashLong_internal_loop(acc, data, len, secret, secretSize, XXH3_acc_128bits);
+
+  /* converge into final hash */
+  XXH_STATIC_ASSERT(sizeof(acc) == 64);
+  XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+  {
+    U64 const low64 = XXH3_mergeAccs(
+        acc, (const char*)secret + XXH_SECRET_MERGEACCS_START, (U64)len * PRIME64_1);
+    U64 const high64 = XXH3_mergeAccs(
+        acc, (const char*)secret + secretSize - sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+        ~((U64)len * PRIME64_2));
+    XXH128_hash_t const h128 = {low64, high64};
+    return h128;
+  }
 }
 
-XXH_NO_INLINE XXH128_hash_t    /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
-XXH3_hashLong_128b_defaultSecret(const void* data, size_t len)
-{
-    return XXH3_hashLong_128b_internal(data, len, kSecret, sizeof(kSecret));
+XXH_NO_INLINE XXH128_hash_t /* It's important for performance that XXH3_hashLong
+                               is not inlined. Not sure why (uop cache maybe ?),
+                               but difference is large and easily measurable */
+XXH3_hashLong_128b_defaultSecret(const void* data, size_t len) {
+  return XXH3_hashLong_128b_internal(data, len, kSecret, sizeof(kSecret));
 }
 
-XXH_NO_INLINE XXH128_hash_t    /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
-XXH3_hashLong_128b_withSecret(const void* data, size_t len,
-                              const void* secret, size_t secretSize)
-{
-    return XXH3_hashLong_128b_internal(data, len, secret, secretSize);
+XXH_NO_INLINE XXH128_hash_t /* It's important for performance that XXH3_hashLong
+                               is not inlined. Not sure why (uop cache maybe ?),
+                               but difference is large and easily measurable */
+XXH3_hashLong_128b_withSecret(const void* data, size_t len, const void* secret,
+                              size_t secretSize) {
+  return XXH3_hashLong_128b_internal(data, len, secret, secretSize);
 }
 
-XXH_NO_INLINE XXH128_hash_t    /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
-XXH3_hashLong_128b_withSeed(const void* data, size_t len, XXH64_hash_t seed)
-{
-    XXH_ALIGN(8) char secret[XXH_SECRET_DEFAULT_SIZE];
-    if (seed == 0) return XXH3_hashLong_128b_defaultSecret(data, len);
-    XXH3_initKeySeed(secret, seed);
-    return XXH3_hashLong_128b_internal(data, len, secret, sizeof(secret));
+XXH_NO_INLINE XXH128_hash_t /* It's important for performance that XXH3_hashLong
+                               is not inlined. Not sure why (uop cache maybe ?),
+                               but difference is large and easily measurable */
+XXH3_hashLong_128b_withSeed(const void* data, size_t len, XXH64_hash_t seed) {
+  XXH_ALIGN(8) char secret[XXH_SECRET_DEFAULT_SIZE];
+  if (seed == 0) return XXH3_hashLong_128b_defaultSecret(data, len);
+  XXH3_initKeySeed(secret, seed);
+  return XXH3_hashLong_128b_internal(data, len, secret, sizeof(secret));
 }
 
-XXH_NO_INLINE XXH128_hash_t
-XXH3_len_129to240_128b(const void* XXH_RESTRICT data, size_t len,
-                      const void* XXH_RESTRICT secret, size_t secretSize,
-                      XXH64_hash_t seed)
-{
-    const BYTE* const p = (const BYTE*)data;
-    const char* const key = (const char*)secret;
-
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
-
-    {   U64 acc1 = len * PRIME64_1;
-        U64 acc2 = 0;
-        int const nbRounds = (int)len / 32;
-        int i;
-        for (i=0; i<4; i++) {
-            acc1 += XXH3_mix16B(p+(32*i),    key+(32*i),     seed);
-            acc2 += XXH3_mix16B(p+(32*i)+16, key+(32*i)+16, -seed);
-        }
-        acc1 = XXH3_avalanche(acc1);
-        acc2 = XXH3_avalanche(acc2);
-        XXH_ASSERT(nbRounds >= 4);
-        for (i=4 ; i < nbRounds; i++) {
-            acc1 += XXH3_mix16B(p+(32*i)   , key+(32*(i-4))    + XXH3_MIDSIZE_STARTOFFSET,  seed);
-            acc2 += XXH3_mix16B(p+(32*i)+16, key+(32*(i-4))+16 + XXH3_MIDSIZE_STARTOFFSET, -seed);
-        }
-        /* last bytes */
-        acc1 += XXH3_mix16B(p + len - 16, key + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET     ,  seed);
-        acc2 += XXH3_mix16B(p + len - 32, key + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, -seed);
-
-        {   U64 const low64 = acc1 + acc2;
-            U64 const high64 = (acc1 * PRIME64_1) + (acc2 * PRIME64_4) + ((len - seed) * PRIME64_2);
-            XXH128_hash_t const h128 = { XXH3_avalanche(low64), (XXH64_hash_t)0 - XXH3_avalanche(high64) };
-            return h128;
-        }
+XXH_NO_INLINE XXH128_hash_t XXH3_len_129to240_128b(const void* XXH_RESTRICT data,
+                                                   size_t len,
+                                                   const void* XXH_RESTRICT secret,
+                                                   size_t secretSize, XXH64_hash_t seed) {
+  const BYTE* const p = (const BYTE*)data;
+  const char* const key = (const char*)secret;
+
+  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+  (void)secretSize;
+  XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
+
+  {
+    U64 acc1 = len * PRIME64_1;
+    U64 acc2 = 0;
+    int const nbRounds = (int)len / 32;
+    int i;
+    for (i = 0; i < 4; i++) {
+      acc1 += XXH3_mix16B(p + (32 * i), key + (32 * i), seed);
+      acc2 += XXH3_mix16B(p + (32 * i) + 16, key + (32 * i) + 16, -seed);
+    }
+    acc1 = XXH3_avalanche(acc1);
+    acc2 = XXH3_avalanche(acc2);
+    XXH_ASSERT(nbRounds >= 4);
+    for (i = 4; i < nbRounds; i++) {
+      acc1 += XXH3_mix16B(p + (32 * i), key + (32 * (i - 4)) + XXH3_MIDSIZE_STARTOFFSET,
+                          seed);
+      acc2 += XXH3_mix16B(p + (32 * i) + 16,
+                          key + (32 * (i - 4)) + 16 + XXH3_MIDSIZE_STARTOFFSET, -seed);
+    }
+    /* last bytes */
+    acc1 += XXH3_mix16B(p + len - 16,
+                        key + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
+    acc2 += XXH3_mix16B(p + len - 32,
+                        key + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, -seed);
+
+    {
+      U64 const low64 = acc1 + acc2;
+      U64 const high64 =
+          (acc1 * PRIME64_1) + (acc2 * PRIME64_4) + ((len - seed) * PRIME64_2);
+      XXH128_hash_t const h128 = {XXH3_avalanche(low64),
+                                  (XXH64_hash_t)0 - XXH3_avalanche(high64)};
+      return h128;
     }
+  }
 }
 
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_len_17to128_128b(const void* XXH_RESTRICT data, size_t len,
-                     const void* XXH_RESTRICT secret, size_t secretSize,
-                     XXH64_hash_t seed)
-{
-    const BYTE* const p = (const BYTE*)data;
-    const char* const key = (const char*)secret;
-
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(16 < len && len <= 128);
-
-    {   U64 acc1 = len * PRIME64_1;
-        U64 acc2 = 0;
-        if (len > 32) {
-            if (len > 64) {
-                if (len > 96) {
-                    acc1 += XXH3_mix16B(p+48, key+96, seed);
-                    acc2 += XXH3_mix16B(p+len-64, key+112, seed);
-                }
-                acc1 += XXH3_mix16B(p+32, key+64, seed);
-                acc2 += XXH3_mix16B(p+len-48, key+80, seed);
-            }
-            acc1 += XXH3_mix16B(p+16, key+32, seed);
-            acc2 += XXH3_mix16B(p+len-32, key+48, seed);
+XXH_FORCE_INLINE XXH128_hash_t XXH3_len_17to128_128b(const void* XXH_RESTRICT data,
+                                                     size_t len,
+                                                     const void* XXH_RESTRICT secret,
+                                                     size_t secretSize,
+                                                     XXH64_hash_t seed) {
+  const BYTE* const p = (const BYTE*)data;
+  const char* const key = (const char*)secret;
+
+  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+  (void)secretSize;
+  XXH_ASSERT(16 < len && len <= 128);
+
+  {
+    U64 acc1 = len * PRIME64_1;
+    U64 acc2 = 0;
+    if (len > 32) {
+      if (len > 64) {
+        if (len > 96) {
+          acc1 += XXH3_mix16B(p + 48, key + 96, seed);
+          acc2 += XXH3_mix16B(p + len - 64, key + 112, seed);
         }
-        acc1 += XXH3_mix16B(p+0, key+0, seed);
-        acc2 += XXH3_mix16B(p+len-16, key+16, seed);
+        acc1 += XXH3_mix16B(p + 32, key + 64, seed);
+        acc2 += XXH3_mix16B(p + len - 48, key + 80, seed);
+      }
+      acc1 += XXH3_mix16B(p + 16, key + 32, seed);
+      acc2 += XXH3_mix16B(p + len - 32, key + 48, seed);
+    }
+    acc1 += XXH3_mix16B(p + 0, key + 0, seed);
+    acc2 += XXH3_mix16B(p + len - 16, key + 16, seed);
 
-        {   U64 const low64 = acc1 + acc2;
-            U64 const high64 = (acc1 * PRIME64_1) + (acc2 * PRIME64_4) + ((len - seed) * PRIME64_2);
-            XXH128_hash_t const h128 = { XXH3_avalanche(low64), (XXH64_hash_t)0 - XXH3_avalanche(high64) };
-            return h128;
-        }
+    {
+      U64 const low64 = acc1 + acc2;
+      U64 const high64 =
+          (acc1 * PRIME64_1) + (acc2 * PRIME64_4) + ((len - seed) * PRIME64_2);
+      XXH128_hash_t const h128 = {XXH3_avalanche(low64),
+                                  (XXH64_hash_t)0 - XXH3_avalanche(high64)};
+      return h128;
     }
+  }
 }
 
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len)
-{
-    if (len <= 16) return XXH3_len_0to16_128b(data, len, kSecret, 0);
-    if (len <= 128) return XXH3_len_17to128_128b(data, len, kSecret, sizeof(kSecret), 0);
-    if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_128b(data, len, kSecret, sizeof(kSecret), 0);
-    return XXH3_hashLong_128b_defaultSecret(data, len);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len) {
+  if (len <= 16) return XXH3_len_0to16_128b(data, len, kSecret, 0);
+  if (len <= 128) return XXH3_len_17to128_128b(data, len, kSecret, sizeof(kSecret), 0);
+  if (len <= XXH3_MIDSIZE_MAX)
+    return XXH3_len_129to240_128b(data, len, kSecret, sizeof(kSecret), 0);
+  return XXH3_hashLong_128b_defaultSecret(data, len);
 }
 
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize)
-{
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-    /* if an action must be taken should `secret` conditions not be respected,
-     * it should be done here.
-     * For now, it's a contract pre-condition.
-     * Adding a check and a branch here would cost performance at every hash */
-     if (len <= 16) return XXH3_len_0to16_128b(data, len, secret, 0);
-     if (len <= 128) return XXH3_len_17to128_128b(data, len, secret, secretSize, 0);
-     if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_128b(data, len, secret, secretSize, 0);
-     return XXH3_hashLong_128b_withSecret(data, len, secret, secretSize);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len,
+                                                     const void* secret,
+                                                     size_t secretSize) {
+  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
+  /* if an action must be taken should `secret` conditions not be respected,
+   * it should be done here.
+   * For now, it's a contract pre-condition.
+   * Adding a check and a branch here would cost performance at every hash */
+  if (len <= 16) return XXH3_len_0to16_128b(data, len, secret, 0);
+  if (len <= 128) return XXH3_len_17to128_128b(data, len, secret, secretSize, 0);
+  if (len <= XXH3_MIDSIZE_MAX)
+    return XXH3_len_129to240_128b(data, len, secret, secretSize, 0);
+  return XXH3_hashLong_128b_withSecret(data, len, secret, secretSize);
 }
 
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed)
-{
-    if (len <= 16) return XXH3_len_0to16_128b(data, len, kSecret, seed);
-    if (len <= 128) return XXH3_len_17to128_128b(data, len, kSecret, sizeof(kSecret), seed);
-    if (len <= XXH3_MIDSIZE_MAX) return XXH3_len_129to240_128b(data, len, kSecret, sizeof(kSecret), seed);
-    return XXH3_hashLong_128b_withSeed(data, len, seed);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len,
+                                                   XXH64_hash_t seed) {
+  if (len <= 16) return XXH3_len_0to16_128b(data, len, kSecret, seed);
+  if (len <= 128) return XXH3_len_17to128_128b(data, len, kSecret, sizeof(kSecret), seed);
+  if (len <= XXH3_MIDSIZE_MAX)
+    return XXH3_len_129to240_128b(data, len, kSecret, sizeof(kSecret), seed);
+  return XXH3_hashLong_128b_withSeed(data, len, seed);
 }
 
-XXH_PUBLIC_API XXH128_hash_t
-XXH128(const void* data, size_t len, XXH64_hash_t seed)
-{
-    return XXH3_128bits_withSeed(data, len, seed);
+XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed) {
+  return XXH3_128bits_withSeed(data, len, seed);
 }
 
-
 /* ===   XXH3 128-bit streaming   === */
 
 /* all the functions are actually the same as for 64-bit streaming variant,
    just the reset one is different (different initial acc values for 0,5,6,7),
    and near the end of the digest function */
 
-static void
-XXH3_128bits_reset_internal(XXH3_state_t* statePtr,
-                           XXH64_hash_t seed,
-                           const void* secret, size_t secretSize)
-{
-    XXH3_64bits_reset_internal(statePtr, seed, secret, secretSize);
+static void XXH3_128bits_reset_internal(XXH3_state_t* statePtr, XXH64_hash_t seed,
+                                        const void* secret, size_t secretSize) {
+  XXH3_64bits_reset_internal(statePtr, seed, secret, secretSize);
 }
 
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset(XXH3_state_t* statePtr)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    XXH3_128bits_reset_internal(statePtr, 0, kSecret, XXH_SECRET_DEFAULT_SIZE);
-    return XXH_OK;
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr) {
+  if (statePtr == NULL) return XXH_ERROR;
+  XXH3_128bits_reset_internal(statePtr, 0, kSecret, XXH_SECRET_DEFAULT_SIZE);
+  return XXH_OK;
 }
 
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    XXH3_128bits_reset_internal(statePtr, 0, secret, secretSize);
-    if (secret == NULL) return XXH_ERROR;
-    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
-    return XXH_OK;
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr,
+                                                           const void* secret,
+                                                           size_t secretSize) {
+  if (statePtr == NULL) return XXH_ERROR;
+  XXH3_128bits_reset_internal(statePtr, 0, secret, secretSize);
+  if (secret == NULL) return XXH_ERROR;
+  if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
+  return XXH_OK;
 }
 
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    XXH3_128bits_reset_internal(statePtr, seed, kSecret, XXH_SECRET_DEFAULT_SIZE);
-    XXH3_initKeySeed(statePtr->customSecret, seed);
-    statePtr->secret = statePtr->customSecret;
-    return XXH_OK;
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr,
+                                                         XXH64_hash_t seed) {
+  if (statePtr == NULL) return XXH_ERROR;
+  XXH3_128bits_reset_internal(statePtr, seed, kSecret, XXH_SECRET_DEFAULT_SIZE);
+  XXH3_initKeySeed(statePtr->customSecret, seed);
+  statePtr->secret = statePtr->customSecret;
+  return XXH_OK;
 }
 
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
-{
-    return XXH3_update(state, input, len, XXH3_acc_128bits);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH3_state_t* state, const void* input,
+                                                 size_t len) {
+  return XXH3_update(state, input, len, XXH3_acc_128bits);
 }
 
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
-{
-    if (state->totalLen > XXH3_MIDSIZE_MAX) {
-        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[ACC_NB];
-        XXH3_digest_long(acc, state, XXH3_acc_128bits);
-        XXH_ASSERT(state->secretLimit + STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
-        {   U64 const low64 = XXH3_mergeAccs(acc, (const char*)state->secret + XXH_SECRET_MERGEACCS_START, (U64)state->totalLen * PRIME64_1);
-            U64 const high64 = XXH3_mergeAccs(acc, (const char*)state->secret + state->secretLimit + STRIPE_LEN - sizeof(acc) - XXH_SECRET_MERGEACCS_START, ~((U64)state->totalLen * PRIME64_2));
-            XXH128_hash_t const h128 = { low64, high64 };
-            return h128;
-        }
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest(const XXH3_state_t* state) {
+  if (state->totalLen > XXH3_MIDSIZE_MAX) {
+    XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[ACC_NB];
+    XXH3_digest_long(acc, state, XXH3_acc_128bits);
+    XXH_ASSERT(state->secretLimit + STRIPE_LEN >=
+               sizeof(acc) + XXH_SECRET_MERGEACCS_START);
+    {
+      U64 const low64 =
+          XXH3_mergeAccs(acc, (const char*)state->secret + XXH_SECRET_MERGEACCS_START,
+                         (U64)state->totalLen * PRIME64_1);
+      U64 const high64 =
+          XXH3_mergeAccs(acc,
+                         (const char*)state->secret + state->secretLimit + STRIPE_LEN -
+                             sizeof(acc) - XXH_SECRET_MERGEACCS_START,
+                         ~((U64)state->totalLen * PRIME64_2));
+      XXH128_hash_t const h128 = {low64, high64};
+      return h128;
     }
-    /* len <= XXH3_MIDSIZE_MAX : short code */
-    if (state->seed)
-        return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
-    return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen), state->secret, state->secretLimit + STRIPE_LEN);
+  }
+  /* len <= XXH3_MIDSIZE_MAX : short code */
+  if (state->seed)
+    return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
+  return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen), state->secret,
+                                 state->secretLimit + STRIPE_LEN);
 }
 
 /* 128-bit utility functions */
 
-#include <string.h>   /* memcmp */
+#include <string.h> /* memcmp */
 
 /* return : 1 is equal, 0 if different */
-XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
-{
-    /* note : XXH128_hash_t is compact, it has no padding byte */
-    return !(memcmp(&h1, &h2, sizeof(h1)));
+XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) {
+  /* note : XXH128_hash_t is compact, it has no padding byte */
+  return !(memcmp(&h1, &h2, sizeof(h1)));
 }
 
 /* This prototype is compatible with stdlib's qsort().
  * return : >0 if *h128_1  > *h128_2
  *          <0 if *h128_1  < *h128_2
  *          =0 if *h128_1 == *h128_2  */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
-{
-    XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
-    XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
-    int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
-    /* note : bets that, in most cases, hash values are different */
-    if (hcmp) return hcmp;
-    return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
+XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2) {
+  XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
+  XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
+  int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
+  /* note : bets that, in most cases, hash values are different */
+  if (hcmp) return hcmp;
+  return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
 }
 
-
 /*======   Canonical representation   ======*/
-XXH_PUBLIC_API void
-XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
-{
-    XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
-    if (XXH_CPU_LITTLE_ENDIAN) {
-        hash.high64 = XXH_swap64(hash.high64);
-        hash.low64  = XXH_swap64(hash.low64);
-    }
-    memcpy(dst, &hash.high64, sizeof(hash.high64));
-    memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst,
+                                             XXH128_hash_t hash) {
+  XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
+  if (XXH_CPU_LITTLE_ENDIAN) {
+    hash.high64 = XXH_swap64(hash.high64);
+    hash.low64 = XXH_swap64(hash.low64);
+  }
+  memcpy(dst, &hash.high64, sizeof(hash.high64));
+  memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
 }
 
-XXH_PUBLIC_API XXH128_hash_t
-XXH128_hashFromCanonical(const XXH128_canonical_t* src)
-{
-    XXH128_hash_t h;
-    h.high64 = XXH_readBE64(src);
-    h.low64  = XXH_readBE64(src->digest + 8);
-    return h;
+XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src) {
+  XXH128_hash_t h;
+  h.high64 = XXH_readBE64(src);
+  h.low64 = XXH_readBE64(src->digest + 8);
+  return h;
 }
 
-
-
-#endif  /* XXH3_H */
+#endif /* XXH3_H */
diff --git a/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxhash.c b/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxhash.c
index 675c0346b..a45d5b095 100644
--- a/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxhash.c
+++ b/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxhash.c
@@ -36,20 +36,22 @@
  *  Tuning parameters
  ***************************************/
 /*!XXH_FORCE_MEMORY_ACCESS :
- * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and
- * portable. Unfortunately, on some target/compiler combinations, the generated assembly
- * is sub-optimal. The below switch allow to select different access method for improved
- * performance. Method 0 (default) : use `memcpy()`. Safe and portable. Method 1 :
- * `__packed` statement. It depends on compiler extension (ie, not portable). This method
- * is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method doesn't depend on compiler but violate C
- * standard. It can generate buggy code on targets which do not support unaligned memory
- * accesses. But in some circumstances, it's the only known way to get the most
- * performance (ie GCC + ARMv6) See http://stackoverflow.com/a/32095106/646947 for
- * details. Prefer these methods in priority order (0 > 1 > 2)
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is
+ * safe and portable. Unfortunately, on some target/compiler combinations, the
+ * generated assembly is sub-optimal. The below switch allow to select different
+ * access method for improved performance. Method 0 (default) : use `memcpy()`.
+ * Safe and portable. Method 1 :
+ * `__packed` statement. It depends on compiler extension (ie, not portable).
+ * This method is safe if your compiler supports it, and *generally* as fast or
+ * faster than `memcpy`. Method 2 : direct access. This method doesn't depend on
+ * compiler but violate C standard. It can generate buggy code on targets which
+ * do not support unaligned memory accesses. But in some circumstances, it's the
+ * only known way to get the most performance (ie GCC + ARMv6) See
+ * http://stackoverflow.com/a/32095106/646947 for details. Prefer these methods
+ * in priority order (0 > 1 > 2)
  */
-#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for \
-                                   example */
+#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line \
+                                   for example */
 #if defined(__GNUC__) &&                                                                \
     (defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \
      defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__))
@@ -63,9 +65,10 @@
 #endif
 
 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
- * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a
- * segfault. When this macro is enabled, xxHash actively checks input for null pointer. It
- * it is, result for null input pointers is the same as a null-length input.
+ * If input pointer is NULL, xxHash default behavior is to dereference it,
+ * triggering a segfault. When this macro is enabled, xxHash actively checks
+ * input for null pointer. It it is, result for null input pointers is the same
+ * as a null-length input.
  */
 #ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
 #define XXH_ACCEPT_NULL_INPUT_POINTER 0
@@ -102,8 +105,8 @@
 /* *************************************
  *  Includes & Memory related functions
  ***************************************/
-/*! Modify the local functions below should you wish to use some other memory routines
- *   for malloc(), free() */
+/*! Modify the local functions below should you wish to use some other memory
+ * routines for malloc(), free() */
 #include <stdlib.h>
 static void* XXH_malloc(size_t s) { return malloc(s); }
 static void XXH_free(void* p) { free(p); }
@@ -186,14 +189,14 @@ typedef unsigned int U32;
 
 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 2))
 
-/* Force direct memory access. Only works on CPU which support unaligned memory access in
- * hardware */
+/* Force direct memory access. Only works on CPU which support unaligned memory
+ * access in hardware */
 static U32 XXH_read32(const void* memPtr) { return *(const U32*)memPtr; }
 
 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 1))
 
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for
- * some compilers */
+/* __pack instructions are safer, but compiler specific, hence potentially
+ * problematic for some compilers */
 /* currently only defined for gcc and icc */
 typedef union {
   U32 u32;
@@ -216,8 +219,8 @@ static U32 XXH_read32(const void* memPtr) {
 /* ===   Endianess   === */
 typedef enum { XXH_bigEndian = 0, XXH_littleEndian = 1 } XXH_endianess;
 
-/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command
- * line */
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler
+ * command line */
 #ifndef XXH_CPU_LITTLE_ENDIAN
 static int XXH_isLittleEndian(void) {
   const union {
@@ -242,7 +245,8 @@ static int XXH_isLittleEndian(void) {
     __has_builtin(__builtin_rotateleft64)
 #define XXH_rotl32 __builtin_rotateleft32
 #define XXH_rotl64 __builtin_rotateleft64
-/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems
+ * poor */
 #elif defined(_MSC_VER)
 #define XXH_rotl32(x, r) _rotl(x, r)
 #define XXH_rotl64(x, r) _rotl64(x, r)
@@ -311,10 +315,11 @@ static U32 XXH32_round(U32 acc, U32 input) {
    * The reason we want to avoid vectorization is because despite working on
    * 4 integers at a time, there are multiple factors slowing XXH32 down on
    * SSE4:
-   * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer
-   * chips!) making it slightly slower to multiply four integers at once compared to four
-   *   integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is
-   *   still not worth it to go into SSE just to multiply unless doing a long operation.
+   * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
+   * newer chips!) making it slightly slower to multiply four integers at once
+   * compared to four integers independently. Even when pmulld was fastest,
+   * Sandy/Ivy Bridge, it is still not worth it to go into SSE just to multiply
+   * unless doing a long operation.
    *
    * - Four instructions are required to rotate,
    *      movqda tmp,  v // not required with VEX encoding
@@ -325,15 +330,15 @@ static U32 XXH32_round(U32 acc, U32 input) {
    *      roll   v, 13    // reliably fast across the board
    *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
    *
-   * - Instruction level parallelism is actually more beneficial here because the
-   *   SIMD actually serializes this operation: While v1 is rotating, v2 can load data,
-   *   while v3 can multiply. SSE forces them to operate together.
+   * - Instruction level parallelism is actually more beneficial here because
+   * the SIMD actually serializes this operation: While v1 is rotating, v2 can
+   * load data, while v3 can multiply. SSE forces them to operate together.
    *
    * How this hack works:
-   * __asm__(""       // Declare an assembly block but don't declare any instructions
-   *          :       // However, as an Input/Output Operand,
-   *          "+r"    // constrain a read/write operand (+) as a general purpose register
-   * (r). (acc)   // and set acc as the operand
+   * __asm__(""       // Declare an assembly block but don't declare any
+   * instructions :       // However, as an Input/Output Operand,
+   *          "+r"    // constrain a read/write operand (+) as a general purpose
+   * register (r). (acc)   // and set acc as the operand
    * );
    *
    * Because of the 'r', the compiler has promised that seed will be in a
@@ -624,10 +629,10 @@ XXH_PUBLIC_API unsigned int XXH32_digest(const XXH32_state_t* state) {
 /*======   Canonical representation   ======*/
 
 /*! Default XXH result types are basic unsigned 32 and 64 bits.
- *   The canonical representation follows human-readable write convention, aka big-endian
- * (large digits first). These functions allow transformation of hash result into and from
- * its canonical format. This way, hash values can be written into a file or buffer,
- * remaining comparable across different systems.
+ *   The canonical representation follows human-readable write convention, aka
+ * big-endian (large digits first). These functions allow transformation of hash
+ * result into and from its canonical format. This way, hash values can be
+ * written into a file or buffer, remaining comparable across different systems.
  */
 
 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) {
@@ -656,7 +661,8 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src
 #include <stdint.h>
 typedef uint64_t U64;
 #else
-/* if compiler doesn't support unsigned long long, replace by another 64-bit type */
+/* if compiler doesn't support unsigned long long, replace by another 64-bit
+ * type */
 typedef unsigned long long U64;
 #endif
 #endif
@@ -664,14 +670,15 @@ typedef unsigned long long U64;
 /*! XXH_REROLL_XXH64:
  * Whether to reroll the XXH64_finalize() loop.
  *
- * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a performance
- * gain on 64-bit hosts, as only one jump is required.
+ * Just like XXH32, we can unroll the XXH64_finalize() loop. This can be a
+ * performance gain on 64-bit hosts, as only one jump is required.
  *
  * However, on 32-bit hosts, because arithmetic needs to be done with two 32-bit
- * registers, and 64-bit arithmetic needs to be simulated, it isn't beneficial to unroll.
- * The code becomes ridiculously large (the largest function in the binary on i386!), and
- * rerolling it saves anywhere from 3kB to 20kB. It is also slightly faster because it
- * fits into cache better and is more likely to be inlined by the compiler.
+ * registers, and 64-bit arithmetic needs to be simulated, it isn't beneficial
+ * to unroll. The code becomes ridiculously large (the largest function in the
+ * binary on i386!), and rerolling it saves anywhere from 3kB to 20kB. It is
+ * also slightly faster because it fits into cache better and is more likely to
+ * be inlined by the compiler.
  *
  * If XXH_REROLL is defined, this is ignored and the loop is always rerolled. */
 #ifndef XXH_REROLL_XXH64
@@ -692,14 +699,14 @@ typedef unsigned long long U64;
 
 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 2))
 
-/* Force direct memory access. Only works on CPU which support unaligned memory access in
- * hardware */
+/* Force direct memory access. Only works on CPU which support unaligned memory
+ * access in hardware */
 static U64 XXH_read64(const void* memPtr) { return *(const U64*)memPtr; }
 
 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS == 1))
 
-/* __pack instructions are safer, but compiler specific, hence potentially problematic for
- * some compilers */
+/* __pack instructions are safer, but compiler specific, hence potentially
+ * problematic for some compilers */
 /* currently only defined for gcc and icc */
 typedef union {
   U32 u32;
diff --git a/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxhash.h b/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxhash.h
index aac49531f..eb8cd66b9 100644
--- a/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxhash.h
+++ b/native-sql-engine/cpp/src/third_party/arrow/vendored/xxhash/xxhash.h
@@ -53,7 +53,8 @@
 xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
 It also successfully passes all tests from the SMHasher suite.
 
-Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo
+@3GHz)
 
 Name            Speed       Q.Score   Author
 xxHash          5.4 GB/s     10
@@ -83,17 +84,15 @@ XXH32        6.8 GB/s            6.0 GB/s
 #ifndef XXHASH_H_5627135585666179
 #define XXHASH_H_5627135585666179 1
 
-#if defined (__cplusplus)
+#if defined(__cplusplus)
 extern "C" {
 #endif
 
-
 /* ****************************
-*  Definitions
-******************************/
-#include <stddef.h>   /* size_t */
-typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
-
+ *  Definitions
+ ******************************/
+#include <stddef.h> /* size_t */
+typedef enum { XXH_OK = 0, XXH_ERROR } XXH_errorcode;
 
 /* ****************************
  *  API modifier
@@ -103,240 +102,257 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
  *  in order to inline them, and remove their symbol from the public list.
  *  Inlining offers great performance improvement on small keys,
  *  and dramatic ones when length is expressed as a compile-time constant.
- *  See https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html .
- *  Methodology :
- *     #define XXH_INLINE_ALL
- *     #include "xxhash.h"
- * `xxhash.c` is automatically included.
- *  It's not useful to compile and link it as a separate object.
+ *  See
+ * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ * . Methodology : #define XXH_INLINE_ALL #include "xxhash.h" `xxhash.c` is
+ * automatically included. It's not useful to compile and link it as a separate
+ * object.
  */
 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
-#  ifndef XXH_STATIC_LINKING_ONLY
-#    define XXH_STATIC_LINKING_ONLY
-#  endif
-#  if defined(__GNUC__)
-#    define XXH_PUBLIC_API static __inline __attribute__((unused))
-#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#    define XXH_PUBLIC_API static inline
-#  elif defined(_MSC_VER)
-#    define XXH_PUBLIC_API static __inline
-#  else
-     /* this version may generate warnings for unused static functions */
-#    define XXH_PUBLIC_API static
-#  endif
+#ifndef XXH_STATIC_LINKING_ONLY
+#define XXH_STATIC_LINKING_ONLY
+#endif
+#if defined(__GNUC__)
+#define XXH_PUBLIC_API static __inline __attribute__((unused))
+#elif defined(__cplusplus) || \
+    (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#define XXH_PUBLIC_API static inline
+#elif defined(_MSC_VER)
+#define XXH_PUBLIC_API static __inline
+#else
+/* this version may generate warnings for unused static functions */
+#define XXH_PUBLIC_API static
+#endif
+#else
+#if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+#ifdef XXH_EXPORT
+#define XXH_PUBLIC_API __declspec(dllexport)
+#elif XXH_IMPORT
+#define XXH_PUBLIC_API __declspec(dllimport)
+#endif
 #else
-#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
-#    ifdef XXH_EXPORT
-#      define XXH_PUBLIC_API __declspec(dllexport)
-#    elif XXH_IMPORT
-#      define XXH_PUBLIC_API __declspec(dllimport)
-#    endif
-#  else
-#    define XXH_PUBLIC_API   /* do nothing */
-#  endif
+#define XXH_PUBLIC_API /* do nothing */
+#endif
 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
 
 /*! XXH_NAMESPACE, aka Namespace Emulation :
  *
- * If you want to include _and expose_ xxHash functions from within your own library,
- * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash,
  *
- * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
- * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from
+ * xxhash library with the value of XXH_NAMESPACE (therefore, avoid NULL and
+ * numeric values).
  *
- * Note that no change is required within the calling program as long as it includes `xxhash.h` :
- * regular symbol name will be automatically translated by this header.
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h` : regular symbol name will be automatically translated by
+ * this header.
  */
 #ifdef XXH_NAMESPACE
-#  define XXH_CAT(A,B) A##B
-#  define XXH_NAME2(A,B) XXH_CAT(A,B)
-#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
-#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
-#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
-#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
-#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
-#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
-#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
-#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
-#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
-#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
-#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
-#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
-#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
-#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
-#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
-#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
-#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
-#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
-#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#define XXH_CAT(A, B) A##B
+#define XXH_NAME2(A, B) XXH_CAT(A, B)
+#define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+#define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+#define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
 #endif
 
-
 /* *************************************
-*  Version
-***************************************/
-#define XXH_VERSION_MAJOR    0
-#define XXH_VERSION_MINOR    7
-#define XXH_VERSION_RELEASE  1
-#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
-XXH_PUBLIC_API unsigned XXH_versionNumber (void);
-
+ *  Version
+ ***************************************/
+#define XXH_VERSION_MAJOR 0
+#define XXH_VERSION_MINOR 7
+#define XXH_VERSION_RELEASE 1
+#define XXH_VERSION_NUMBER \
+  (XXH_VERSION_MAJOR * 100 * 100 + XXH_VERSION_MINOR * 100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber(void);
 
 /*-**********************************************************************
-*  32-bit hash
-************************************************************************/
-#if !defined (__VMS) \
-  && (defined (__cplusplus) \
-  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#   include <stdint.h>
-    typedef uint32_t XXH32_hash_t;
+ *  32-bit hash
+ ************************************************************************/
+#if !defined(__VMS) &&       \
+    (defined(__cplusplus) || \
+     (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
+#include <stdint.h>
+typedef uint32_t XXH32_hash_t;
 #else
-    typedef unsigned int XXH32_hash_t;
+typedef unsigned int XXH32_hash_t;
 #endif
 
 /*! XXH32() :
-    Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
-    The memory between input & input+length must be valid (allocated and read-accessible).
-    "seed" can be used to alter the result predictably.
-    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
-XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+    Calculate the 32-bit hash of sequence "length" bytes stored at memory
+   address "input". The memory between input & input+length must be valid
+   (allocated and read-accessible). "seed" can be used to alter the result
+   predictably.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+ */
+XXH_PUBLIC_API XXH32_hash_t XXH32(const void* input, size_t length, unsigned int seed);
 
 /*======   Streaming   ======*/
-typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
+typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
-XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
-XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state,
+                                    const XXH32_state_t* src_state);
 
-XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
-XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update(XXH32_state_t* statePtr, const void* input,
+                                          size_t length);
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* statePtr);
 
 /*
- * Streaming functions generate the xxHash of an input provided in multiple segments.
- * Note that, for small input, they are slower than single-call functions, due to state management.
- * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ * Streaming functions generate the xxHash of an input provided in multiple
+ * segments. Note that, for small input, they are slower than single-call
+ * functions, due to state management. For small inputs, prefer `XXH32()` and
+ * `XXH64()`, which are better optimized.
  *
  * XXH state must first be allocated, using XXH*_createState() .
  *
  * Start a new hash by initializing state with a seed, using XXH*_reset().
  *
- * Then, feed the hash state by calling XXH*_update() as many times as necessary.
- * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+ * Then, feed the hash state by calling XXH*_update() as many times as
+ * necessary. The function returns an error code, with 0 meaning OK, and any
+ * other value meaning there is an error.
  *
  * Finally, a hash value can be produced anytime, by using XXH*_digest().
  * This function returns the nn-bits hash as an int or long long.
  *
- * It's still possible to continue inserting input into the hash state after a digest,
- * and generate some new hashes later on, by calling again XXH*_digest().
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate some new hashes later on, by calling again
+ * XXH*_digest().
  *
  * When done, free XXH state space if it was allocated dynamically.
  */
 
 /*======   Canonical representation   ======*/
 
-typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+typedef struct {
+  unsigned char digest[4];
+} XXH32_canonical_t;
 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
 
 /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
- * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
- * These functions allow transformation of hash result into and from its canonical format.
- * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+ * The canonical representation uses human-readable write convention, aka
+ * big-endian (large digits first). These functions allow transformation of hash
+ * result into and from its canonical format. This way, hash values can be
+ * written into a file / memory, and remain comparable on different systems and
+ * programs.
  */
 
-
 #ifndef XXH_NO_LONG_LONG
 /*-**********************************************************************
-*  64-bit hash
-************************************************************************/
-#if !defined (__VMS) \
-  && (defined (__cplusplus) \
-  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#   include <stdint.h>
-    typedef uint64_t XXH64_hash_t;
+ *  64-bit hash
+ ************************************************************************/
+#if !defined(__VMS) &&       \
+    (defined(__cplusplus) || \
+     (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */))
+#include <stdint.h>
+typedef uint64_t XXH64_hash_t;
 #else
-    typedef unsigned long long XXH64_hash_t;
+typedef unsigned long long XXH64_hash_t;
 #endif
 
 /*! XXH64() :
-    Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
-    "seed" can be used to alter the result predictably.
-    This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
+    Calculate the 64-bit hash of sequence of length "len" stored at memory
+   address "input". "seed" can be used to alter the result predictably. This
+   function runs faster on 64-bit systems, but slower on 32-bit systems (see
+   benchmark).
 */
-XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length,
+                                  unsigned long long seed);
 
 /*======   Streaming   ======*/
-typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
-XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state,
+                                    const XXH64_state_t* src_state);
 
-XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
-XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr,
+                                         unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update(XXH64_state_t* statePtr, const void* input,
+                                          size_t length);
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* statePtr);
 
 /*======   Canonical representation   ======*/
-typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+typedef struct {
+  unsigned char digest[8];
+} XXH64_canonical_t;
 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
 
-
-#endif  /* XXH_NO_LONG_LONG */
-
-
+#endif /* XXH_NO_LONG_LONG */
 
 #ifdef XXH_STATIC_LINKING_ONLY
 
 /* ================================================================================================
    This section contains declarations which are not guaranteed to remain stable.
-   They may change in future versions, becoming incompatible with a different version of the library.
-   These declarations should only be used with static linking.
-   Never use them in association with dynamic linking !
-=================================================================================================== */
+   They may change in future versions, becoming incompatible with a different
+version of the library. These declarations should only be used with static
+linking. Never use them in association with dynamic linking !
+===================================================================================================
+*/
 
 /* These definitions are only present to allow
  * static allocation of XXH state, on stack or in a struct for example.
  * Never **ever** use members directly. */
 
 struct XXH32_state_s {
-   XXH32_hash_t total_len_32;
-   XXH32_hash_t large_len;
-   XXH32_hash_t v1;
-   XXH32_hash_t v2;
-   XXH32_hash_t v3;
-   XXH32_hash_t v4;
-   XXH32_hash_t mem32[4];
-   XXH32_hash_t memsize;
-   XXH32_hash_t reserved;   /* never read nor write, might be removed in a future version */
-};   /* typedef'd to XXH32_state_t */
-
-#ifndef XXH_NO_LONG_LONG  /* remove 64-bit support */
+  XXH32_hash_t total_len_32;
+  XXH32_hash_t large_len;
+  XXH32_hash_t v1;
+  XXH32_hash_t v2;
+  XXH32_hash_t v3;
+  XXH32_hash_t v4;
+  XXH32_hash_t mem32[4];
+  XXH32_hash_t memsize;
+  XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */
+};                       /* typedef'd to XXH32_state_t */
+
+#ifndef XXH_NO_LONG_LONG /* remove 64-bit support */
 struct XXH64_state_s {
-   XXH64_hash_t total_len;
-   XXH64_hash_t v1;
-   XXH64_hash_t v2;
-   XXH64_hash_t v3;
-   XXH64_hash_t v4;
-   XXH64_hash_t mem64[4];
-   XXH32_hash_t memsize;
-   XXH32_hash_t reserved[2];     /* never read nor write, might be removed in a future version */
-};   /* typedef'd to XXH64_state_t */
-#endif   /* XXH_NO_LONG_LONG */
-
+  XXH64_hash_t total_len;
+  XXH64_hash_t v1;
+  XXH64_hash_t v2;
+  XXH64_hash_t v3;
+  XXH64_hash_t v4;
+  XXH64_hash_t mem64[4];
+  XXH32_hash_t memsize;
+  XXH32_hash_t reserved[2]; /* never read nor write, might be removed in a
+                               future version */
+};                          /* typedef'd to XXH64_state_t */
+#endif                      /* XXH_NO_LONG_LONG */
 
 /*-**********************************************************************
-*  XXH3
-*  New experimental hash
-************************************************************************/
+ *  XXH3
+ *  New experimental hash
+ ************************************************************************/
 #ifndef XXH_NO_LONG_LONG
 
-
 /* ============================================
  * XXH3 is a new hash algorithm,
  * featuring improved speed performance for both small and large inputs.
- * See full speed analysis at : http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
- * In general, expect XXH3 to run about ~2x faster on large inputs,
- * and >3x faster on small ones, though exact differences depend on platform.
+ * See full speed analysis at :
+ * http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html In general,
+ * expect XXH3 to run about ~2x faster on large inputs, and >3x faster on small
+ * ones, though exact differences depend on platform.
  *
  * The algorithm is portable, will generate the same hash on all platforms.
  * It benefits greatly from vectorization units, but does not require it.
@@ -348,51 +364,58 @@ struct XXH64_state_s {
  *
  * The XXH3 algorithm is still considered experimental.
  * Produced results can still change between versions.
- * For example, results produced by v0.7.1 are not comparable with results from v0.7.0 .
- * It's nonetheless possible to use XXH3 for ephemeral data (local sessions),
- * but avoid storing values in long-term storage for later re-use.
+ * For example, results produced by v0.7.1 are not comparable with results from
+ * v0.7.0 . It's nonetheless possible to use XXH3 for ephemeral data (local
+ * sessions), but avoid storing values in long-term storage for later re-use.
  *
  * The API supports one-shot hashing, streaming mode, and custom secrets.
  *
- * There are still a number of opened questions that community can influence during the experimental period.
- * I'm trying to list a few of them below, though don't consider this list as complete.
+ * There are still a number of opened questions that community can influence
+ * during the experimental period. I'm trying to list a few of them below,
+ * though don't consider this list as complete.
  *
- * - 128-bits output type : currently defined as a structure of two 64-bits fields.
- *                          That's because 128-bit values do not exist in C standard.
- *                          Note that it means that, at byte level, result is not identical depending on endianess.
- *                          However, at field level, they are identical on all platforms.
- *                          The canonical representation solves the issue of identical byte-level representation across platforms,
- *                          which is necessary for serialization.
- *                          Would there be a better representation for a 128-bit hash result ?
- *                          Are the names of the inner 64-bit fields important ? Should they be changed ?
+ * - 128-bits output type : currently defined as a structure of two 64-bits
+ * fields. That's because 128-bit values do not exist in C standard. Note that
+ * it means that, at byte level, result is not identical depending on endianess.
+ *                          However, at field level, they are identical on all
+ * platforms. The canonical representation solves the issue of identical
+ * byte-level representation across platforms, which is necessary for
+ * serialization. Would there be a better representation for a 128-bit hash
+ * result ? Are the names of the inner 64-bit fields important ? Should they be
+ * changed ?
  *
- * - Seed type for 128-bits variant : currently, it's a single 64-bit value, like the 64-bit variant.
- *                          It could be argued that it's more logical to offer a 128-bit seed input parameter for a 128-bit hash.
- *                          But 128-bit seed is more difficult to use, since it requires to pass a structure instead of a scalar value.
- *                          Such a variant could either replace current one, or become an additional one.
- *                          Farmhash, for example, offers both variants (the 128-bits seed variant is called `doubleSeed`).
- *                          If both 64-bit and 128-bit seeds are possible, which variant should be called XXH128 ?
+ * - Seed type for 128-bits variant : currently, it's a single 64-bit value,
+ * like the 64-bit variant. It could be argued that it's more logical to offer a
+ * 128-bit seed input parameter for a 128-bit hash. But 128-bit seed is more
+ * difficult to use, since it requires to pass a structure instead of a scalar
+ * value. Such a variant could either replace current one, or become an
+ * additional one. Farmhash, for example, offers both variants (the 128-bits
+ * seed variant is called `doubleSeed`). If both 64-bit and 128-bit seeds are
+ * possible, which variant should be called XXH128 ?
  *
- * - Result for len==0 : Currently, the result of hashing a zero-length input is `0`.
- *                          It seems okay as a return value when using all "default" secret and seed (it used to be a request for XXH32/XXH64).
- *                          But is it still fine to return `0` when secret or seed are non-default ?
- *                          Are there use cases which could depend on generating a different hash result for zero-length input when the secret is different ?
+ * - Result for len==0 : Currently, the result of hashing a zero-length input is
+ * `0`. It seems okay as a return value when using all "default" secret and seed
+ * (it used to be a request for XXH32/XXH64). But is it still fine to return `0`
+ * when secret or seed are non-default ? Are there use cases which could depend
+ * on generating a different hash result for zero-length input when the secret
+ * is different ?
  */
 
 #ifdef XXH_NAMESPACE
-#  define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
-#  define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
-#  define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
-
-#  define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
-#  define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
-#  define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
-
-#  define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
-#  define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
-#  define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
-#  define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
-#  define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
+#define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
+#define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
+#define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
+
+#define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
+#define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
+#define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
+
+#define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
+#define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
+#define XXH3_64bits_reset_withSecret \
+  XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
+#define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
+#define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
 #endif
 
 /* XXH3_64bits() :
@@ -401,57 +424,61 @@ struct XXH64_state_s {
 XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
 
 /* XXH3_64bits_withSecret() :
- * It's possible to provide any blob of bytes as a "secret" to generate the hash.
- * This makes it more difficult for an external actor to prepare an intentional collision.
- * The secret *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
- * It should consist of random bytes.
- * Avoid repeating same character, or sequences of bytes,
- * and especially avoid swathes of \0.
+ * It's possible to provide any blob of bytes as a "secret" to generate the
+ * hash. This makes it more difficult for an external actor to prepare an
+ * intentional collision. The secret *must* be large enough (>=
+ * XXH3_SECRET_SIZE_MIN). It should consist of random bytes. Avoid repeating
+ * same character, or sequences of bytes, and especially avoid swathes of \0.
  * Failure to respect these conditions will result in a poor quality hash.
  */
 #define XXH3_SECRET_SIZE_MIN 136
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len,
+                                                   const void* secret, size_t secretSize);
 
 /* XXH3_64bits_withSeed() :
  * This variant generates on the fly a custom secret,
  * based on the default secret, altered using the `seed` value.
  * While this operation is decently fast, note that it's not completely free.
  * note : seed==0 produces same results as XXH3_64bits() */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
-
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len,
+                                                 XXH64_hash_t seed);
 
 /* streaming 64-bit */
 
-#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)   /* C11+ */
-#  include <stdalign.h>
-#  define XXH_ALIGN(n)      alignas(n)
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */
+#include <stdalign.h>
+#define XXH_ALIGN(n) alignas(n)
 #elif defined(__GNUC__)
-#  define XXH_ALIGN(n)      __attribute__ ((aligned(n)))
+#define XXH_ALIGN(n) __attribute__((aligned(n)))
 #elif defined(_MSC_VER)
-#  define XXH_ALIGN(n)      __declspec(align(n))
+#define XXH_ALIGN(n) __declspec(align(n))
 #else
-#  define XXH_ALIGN(n)   /* disabled */
+#define XXH_ALIGN(n) /* disabled */
 #endif
 
 typedef struct XXH3_state_s XXH3_state_t;
 
-#define XXH3_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */
+#define XXH3_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */
 #define XXH3_INTERNALBUFFER_SIZE 256
 struct XXH3_state_s {
-   XXH_ALIGN(64) XXH64_hash_t acc[8];
-   XXH_ALIGN(64) char customSecret[XXH3_SECRET_DEFAULT_SIZE];  /* used to store a custom secret generated from the seed. Makes state larger. Design might change */
-   XXH_ALIGN(64) char buffer[XXH3_INTERNALBUFFER_SIZE];
-   const void* secret;
-   XXH32_hash_t bufferedSize;
-   XXH32_hash_t nbStripesPerBlock;
-   XXH32_hash_t nbStripesSoFar;
-   XXH32_hash_t reserved32;
-   XXH32_hash_t reserved32_2;
-   XXH32_hash_t secretLimit;
-   XXH64_hash_t totalLen;
-   XXH64_hash_t seed;
-   XXH64_hash_t reserved64;
-};   /* typedef'd to XXH3_state_t */
+  XXH_ALIGN(64) XXH64_hash_t acc[8];
+  XXH_ALIGN(64)
+  char customSecret[XXH3_SECRET_DEFAULT_SIZE]; /* used to store a custom secret
+                                                  generated from the seed. Makes
+                                                  state larger. Design might
+                                                  change */
+  XXH_ALIGN(64) char buffer[XXH3_INTERNALBUFFER_SIZE];
+  const void* secret;
+  XXH32_hash_t bufferedSize;
+  XXH32_hash_t nbStripesPerBlock;
+  XXH32_hash_t nbStripesSoFar;
+  XXH32_hash_t reserved32;
+  XXH32_hash_t reserved32_2;
+  XXH32_hash_t secretLimit;
+  XXH64_hash_t totalLen;
+  XXH64_hash_t seed;
+  XXH64_hash_t reserved64;
+}; /* typedef'd to XXH3_state_t */
 
 /* Streaming requires state maintenance.
  * This operation costs memory and cpu.
@@ -460,8 +487,8 @@ struct XXH3_state_s {
 
 XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
 XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
-XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
-
+XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state,
+                                   const XXH3_state_t* src_state);
 
 /* XXH3_64bits_reset() :
  * initialize with default parameters.
@@ -470,54 +497,64 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
 /* XXH3_64bits_reset_withSeed() :
  * generate a custom secret from `seed`, and store it into state.
  * digest will be equivalent to `XXH3_64bits_withSeed()`. */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr,
+                                                        XXH64_hash_t seed);
 /* XXH3_64bits_reset_withSecret() :
  * `secret` is referenced, and must outlive the hash streaming session.
  * secretSize must be >= XXH3_SECRET_SIZE_MIN.
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
-
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_digest (const XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr,
+                                                          const void* secret,
+                                                          size_t secretSize);
 
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH3_state_t* statePtr, const void* input,
+                                                size_t length);
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest(const XXH3_state_t* statePtr);
 
 /* 128-bit */
 
 #ifdef XXH_NAMESPACE
-#  define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
-#  define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
-#  define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
-#  define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
-
-#  define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
-#  define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
-#  define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
-#  define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
-#  define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
-
-#  define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
-#  define XXH128_cmp     XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
-#  define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
-#  define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
+#define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
+#define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
+#define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
+#define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
+
+#define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
+#define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
+#define XXH3_128bits_reset_withSecret \
+  XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
+#define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
+#define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
+
+#define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
+#define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
+#define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
+#define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
 #endif
 
 typedef struct {
-    XXH64_hash_t low64;
-    XXH64_hash_t high64;
+  XXH64_hash_t low64;
+  XXH64_hash_t high64;
 } XXH128_hash_t;
 
 XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
 XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);  /* == XXH128() */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len,
+                                                   XXH64_hash_t seed); /* == XXH128() */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len,
+                                                     const void* secret,
+                                                     size_t secretSize);
 
 XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
-
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr,
+                                                         XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr,
+                                                           const void* secret,
+                                                           size_t secretSize);
 
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH3_state_t* statePtr,
+                                                 const void* input, size_t length);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest(const XXH3_state_t* statePtr);
 
 /* Note : for better performance, following functions should be inlined,
  * using XXH_INLINE_ALL */
@@ -531,29 +568,25 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
  *          =0 if *h128_1 == *h128_2  */
 XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
 
-
 /*======   Canonical representation   ======*/
-typedef struct { unsigned char digest[16]; } XXH128_canonical_t;
+typedef struct {
+  unsigned char digest[16];
+} XXH128_canonical_t;
 XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
 XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
 
-
-#endif  /* XXH_NO_LONG_LONG */
-
+#endif /* XXH_NO_LONG_LONG */
 
 /*-**********************************************************************
-*  XXH_INLINE_ALL
-************************************************************************/
+ *  XXH_INLINE_ALL
+ ************************************************************************/
 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
-#  include "xxhash.c"   /* include xxhash function bodies as `static`, for inlining */
+#include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
 #endif
 
-
-
 #endif /* XXH_STATIC_LINKING_ONLY */
 
-
-#if defined (__cplusplus)
+#if defined(__cplusplus)
 }
 #endif
 
diff --git a/native-sql-engine/cpp/src/third_party/datetime/date.h b/native-sql-engine/cpp/src/third_party/datetime/date.h
index 9ba144957..c1abbe8cc 100644
--- a/native-sql-engine/cpp/src/third_party/datetime/date.h
+++ b/native-sql-engine/cpp/src/third_party/datetime/date.h
@@ -32,8 +32,8 @@
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
@@ -43,25 +43,25 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 // SOFTWARE.
 //
-// Our apologies.  When the previous paragraph was written, lowercase had not yet
-// been invented (that would involve another several millennia of evolution).
-// We did not mean to shout.
+// Our apologies.  When the previous paragraph was written, lowercase had not
+// yet been invented (that would involve another several millennia of
+// evolution). We did not mean to shout.
 
 #ifndef HAS_STRING_VIEW
-#  if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
-#    define HAS_STRING_VIEW 1
-#  else
-#    define HAS_STRING_VIEW 0
-#  endif
+#if __cplusplus >= 201703 || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+#define HAS_STRING_VIEW 1
+#else
+#define HAS_STRING_VIEW 0
+#endif
 #endif  // HAS_STRING_VIEW
 
-#include <cassert>
 #include <algorithm>
+#include <cassert>
 #include <cctype>
 #include <chrono>
 #include <climits>
 #if !(__cplusplus >= 201402)
-#  include <cmath>
+#include <cmath>
 #endif
 #include <cstddef>
 #include <cstdint>
@@ -79,98 +79,96 @@
 #include <stdexcept>
 #include <string>
 #if HAS_STRING_VIEW
-# include <string_view>
+#include <string_view>
 #endif
-#include <utility>
 #include <type_traits>
+#include <utility>
 
 #ifdef __GNUC__
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wpedantic"
-# if __GNUC__ < 5
-   // GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers
-#  pragma GCC diagnostic ignored "-Wmissing-field-initializers"
-# endif
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpedantic"
+#if __GNUC__ < 5
+// GCC 4.9 Bug 61489 Wrong warning with -Wmissing-field-initializers
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+#endif
 #endif
 
 #ifdef _MSC_VER
-#   pragma warning(push)
+#pragma warning(push)
 // warning C4127: conditional expression is constant
-#   pragma warning(disable : 4127)
+#pragma warning(disable : 4127)
 #endif
 
-namespace arrow_vendored
-{
-namespace date
-{
+namespace arrow_vendored {
+namespace date {
 
 //---------------+
 // Configuration |
 //---------------+
 
 #ifndef ONLY_C_LOCALE
-#  define ONLY_C_LOCALE 0
+#define ONLY_C_LOCALE 0
 #endif
 
 #if defined(_MSC_VER) && (!defined(__clang__) || (_MSC_VER < 1910))
 // MSVC
-#  ifndef _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING
-#    define _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING
-#  endif
-#  if _MSC_VER < 1910
+#ifndef _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING
+#define _SILENCE_CXX17_UNCAUGHT_EXCEPTION_DEPRECATION_WARNING
+#endif
+#if _MSC_VER < 1910
 //   before VS2017
-#    define CONSTDATA const
-#    define CONSTCD11
-#    define CONSTCD14
-#    define NOEXCEPT _NOEXCEPT
-#  else
+#define CONSTDATA const
+#define CONSTCD11
+#define CONSTCD14
+#define NOEXCEPT _NOEXCEPT
+#else
 //   VS2017 and later
-#    define CONSTDATA constexpr const
-#    define CONSTCD11 constexpr
-#    define CONSTCD14 constexpr
-#    define NOEXCEPT noexcept
-#  endif
+#define CONSTDATA constexpr const
+#define CONSTCD11 constexpr
+#define CONSTCD14 constexpr
+#define NOEXCEPT noexcept
+#endif
 
 #elif defined(__SUNPRO_CC) && __SUNPRO_CC <= 0x5150
 // Oracle Developer Studio 12.6 and earlier
-#  define CONSTDATA constexpr const
-#  define CONSTCD11 constexpr
-#  define CONSTCD14
-#  define NOEXCEPT noexcept
+#define CONSTDATA constexpr const
+#define CONSTCD11 constexpr
+#define CONSTCD14
+#define NOEXCEPT noexcept
 
 #elif __cplusplus >= 201402
 // C++14
-#  define CONSTDATA constexpr const
-#  define CONSTCD11 constexpr
-#  define CONSTCD14 constexpr
-#  define NOEXCEPT noexcept
+#define CONSTDATA constexpr const
+#define CONSTCD11 constexpr
+#define CONSTCD14 constexpr
+#define NOEXCEPT noexcept
 #else
 // C++11
-#  define CONSTDATA constexpr const
-#  define CONSTCD11 constexpr
-#  define CONSTCD14
-#  define NOEXCEPT noexcept
+#define CONSTDATA constexpr const
+#define CONSTCD11 constexpr
+#define CONSTCD14
+#define NOEXCEPT noexcept
 #endif
 
 #ifndef HAS_UNCAUGHT_EXCEPTIONS
-#  if __cplusplus > 201703
-#    define HAS_UNCAUGHT_EXCEPTIONS 1
-#  else
-#    define HAS_UNCAUGHT_EXCEPTIONS 0
-#  endif
+#if __cplusplus > 201703
+#define HAS_UNCAUGHT_EXCEPTIONS 1
+#else
+#define HAS_UNCAUGHT_EXCEPTIONS 0
+#endif
 #endif  // HAS_UNCAUGHT_EXCEPTIONS
 
 #ifndef HAS_VOID_T
-#  if __cplusplus >= 201703
-#    define HAS_VOID_T 1
-#  else
-#    define HAS_VOID_T 0
-#  endif
+#if __cplusplus >= 201703
+#define HAS_VOID_T 1
+#else
+#define HAS_VOID_T 0
+#endif
 #endif  // HAS_VOID_T
 
 // Protect from Oracle sun macro
 #ifdef sun
-#  undef sun
+#undef sun
 #endif
 
 //-----------+
@@ -179,39 +177,39 @@ namespace date
 
 // durations
 
-using days = std::chrono::duration
-    <int, std::ratio_multiply<std::ratio<24>, std::chrono::hours::period>>;
+using days = std::chrono::duration<
+    int, std::ratio_multiply<std::ratio<24>, std::chrono::hours::period>>;
 
-using weeks = std::chrono::duration
-    <int, std::ratio_multiply<std::ratio<7>, days::period>>;
+using weeks =
+    std::chrono::duration<int, std::ratio_multiply<std::ratio<7>, days::period>>;
 
-using years = std::chrono::duration
-    <int, std::ratio_multiply<std::ratio<146097, 400>, days::period>>;
+using years =
+    std::chrono::duration<int,
+                          std::ratio_multiply<std::ratio<146097, 400>, days::period>>;
 
-using months = std::chrono::duration
-    <int, std::ratio_divide<years::period, std::ratio<12>>>;
+using months =
+    std::chrono::duration<int, std::ratio_divide<years::period, std::ratio<12>>>;
 
 // time_point
 
 template <class Duration>
-    using sys_time = std::chrono::time_point<std::chrono::system_clock, Duration>;
+using sys_time = std::chrono::time_point<std::chrono::system_clock, Duration>;
 
-using sys_days    = sys_time<days>;
+using sys_days = sys_time<days>;
 using sys_seconds = sys_time<std::chrono::seconds>;
 
 struct local_t {};
 
 template <class Duration>
-    using local_time = std::chrono::time_point<local_t, Duration>;
+using local_time = std::chrono::time_point<local_t, Duration>;
 
 using local_seconds = local_time<std::chrono::seconds>;
-using local_days    = local_time<days>;
+using local_days = local_time<days>;
 
 // types
 
-struct last_spec
-{
-    explicit last_spec() = default;
+struct last_spec {
+  explicit last_spec() = default;
 };
 
 class day;
@@ -237,307 +235,291 @@ class year_month_weekday_last;
 // date composition operators
 
 CONSTCD11 year_month operator/(const year& y, const month& m) NOEXCEPT;
-CONSTCD11 year_month operator/(const year& y, int          m) NOEXCEPT;
+CONSTCD11 year_month operator/(const year& y, int m) NOEXCEPT;
 
 CONSTCD11 month_day operator/(const day& d, const month& m) NOEXCEPT;
-CONSTCD11 month_day operator/(const day& d, int          m) NOEXCEPT;
+CONSTCD11 month_day operator/(const day& d, int m) NOEXCEPT;
 CONSTCD11 month_day operator/(const month& m, const day& d) NOEXCEPT;
-CONSTCD11 month_day operator/(const month& m, int        d) NOEXCEPT;
-CONSTCD11 month_day operator/(int          m, const day& d) NOEXCEPT;
+CONSTCD11 month_day operator/(const month& m, int d) NOEXCEPT;
+CONSTCD11 month_day operator/(int m, const day& d) NOEXCEPT;
 
 CONSTCD11 month_day_last operator/(const month& m, last_spec) NOEXCEPT;
-CONSTCD11 month_day_last operator/(int          m, last_spec) NOEXCEPT;
+CONSTCD11 month_day_last operator/(int m, last_spec) NOEXCEPT;
 CONSTCD11 month_day_last operator/(last_spec, const month& m) NOEXCEPT;
-CONSTCD11 month_day_last operator/(last_spec, int          m) NOEXCEPT;
+CONSTCD11 month_day_last operator/(last_spec, int m) NOEXCEPT;
 
 CONSTCD11 month_weekday operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT;
-CONSTCD11 month_weekday operator/(int          m, const weekday_indexed& wdi) NOEXCEPT;
+CONSTCD11 month_weekday operator/(int m, const weekday_indexed& wdi) NOEXCEPT;
 CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT;
-CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, int          m) NOEXCEPT;
+CONSTCD11 month_weekday operator/(const weekday_indexed& wdi, int m) NOEXCEPT;
 
 CONSTCD11 month_weekday_last operator/(const month& m, const weekday_last& wdl) NOEXCEPT;
-CONSTCD11 month_weekday_last operator/(int          m, const weekday_last& wdl) NOEXCEPT;
+CONSTCD11 month_weekday_last operator/(int m, const weekday_last& wdl) NOEXCEPT;
 CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, const month& m) NOEXCEPT;
-CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, int          m) NOEXCEPT;
+CONSTCD11 month_weekday_last operator/(const weekday_last& wdl, int m) NOEXCEPT;
 
 CONSTCD11 year_month_day operator/(const year_month& ym, const day& d) NOEXCEPT;
-CONSTCD11 year_month_day operator/(const year_month& ym, int        d) NOEXCEPT;
+CONSTCD11 year_month_day operator/(const year_month& ym, int d) NOEXCEPT;
 CONSTCD11 year_month_day operator/(const year& y, const month_day& md) NOEXCEPT;
-CONSTCD11 year_month_day operator/(int         y, const month_day& md) NOEXCEPT;
+CONSTCD11 year_month_day operator/(int y, const month_day& md) NOEXCEPT;
 CONSTCD11 year_month_day operator/(const month_day& md, const year& y) NOEXCEPT;
-CONSTCD11 year_month_day operator/(const month_day& md, int         y) NOEXCEPT;
+CONSTCD11 year_month_day operator/(const month_day& md, int y) NOEXCEPT;
 
 CONSTCD11
-    year_month_day_last operator/(const year_month& ym,   last_spec) NOEXCEPT;
+year_month_day_last operator/(const year_month& ym, last_spec) NOEXCEPT;
 CONSTCD11
-    year_month_day_last operator/(const year& y, const month_day_last& mdl) NOEXCEPT;
+year_month_day_last operator/(const year& y, const month_day_last& mdl) NOEXCEPT;
 CONSTCD11
-    year_month_day_last operator/(int         y, const month_day_last& mdl) NOEXCEPT;
+year_month_day_last operator/(int y, const month_day_last& mdl) NOEXCEPT;
 CONSTCD11
-    year_month_day_last operator/(const month_day_last& mdl, const year& y) NOEXCEPT;
+year_month_day_last operator/(const month_day_last& mdl, const year& y) NOEXCEPT;
 CONSTCD11
-    year_month_day_last operator/(const month_day_last& mdl, int         y) NOEXCEPT;
+year_month_day_last operator/(const month_day_last& mdl, int y) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday
-operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT;
+year_month_weekday operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday
-operator/(const year&        y, const month_weekday&   mwd) NOEXCEPT;
+year_month_weekday operator/(const year& y, const month_weekday& mwd) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday
-operator/(int                y, const month_weekday&   mwd) NOEXCEPT;
+year_month_weekday operator/(int y, const month_weekday& mwd) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday
-operator/(const month_weekday& mwd, const year&          y) NOEXCEPT;
+year_month_weekday operator/(const month_weekday& mwd, const year& y) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday
-operator/(const month_weekday& mwd, int                  y) NOEXCEPT;
+year_month_weekday operator/(const month_weekday& mwd, int y) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday_last
-operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT;
+year_month_weekday_last operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday_last
-operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT;
+year_month_weekday_last operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday_last
-operator/(int         y, const month_weekday_last& mwdl) NOEXCEPT;
+year_month_weekday_last operator/(int y, const month_weekday_last& mwdl) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday_last
-operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT;
+year_month_weekday_last operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday_last
-operator/(const month_weekday_last& mwdl, int         y) NOEXCEPT;
+year_month_weekday_last operator/(const month_weekday_last& mwdl, int y) NOEXCEPT;
 
 // Detailed interface
 
 // day
 
-class day
-{
-    unsigned char d_;
+class day {
+  unsigned char d_;
 
-public:
-    day() = default;
-    explicit CONSTCD11 day(unsigned d) NOEXCEPT;
+ public:
+  day() = default;
+  explicit CONSTCD11 day(unsigned d) NOEXCEPT;
 
-    CONSTCD14 day& operator++()    NOEXCEPT;
-    CONSTCD14 day  operator++(int) NOEXCEPT;
-    CONSTCD14 day& operator--()    NOEXCEPT;
-    CONSTCD14 day  operator--(int) NOEXCEPT;
+  CONSTCD14 day& operator++() NOEXCEPT;
+  CONSTCD14 day operator++(int) NOEXCEPT;
+  CONSTCD14 day& operator--() NOEXCEPT;
+  CONSTCD14 day operator--(int) NOEXCEPT;
 
-    CONSTCD14 day& operator+=(const days& d) NOEXCEPT;
-    CONSTCD14 day& operator-=(const days& d) NOEXCEPT;
+  CONSTCD14 day& operator+=(const days& d) NOEXCEPT;
+  CONSTCD14 day& operator-=(const days& d) NOEXCEPT;
 
-    CONSTCD11 explicit operator unsigned() const NOEXCEPT;
-    CONSTCD11 bool ok() const NOEXCEPT;
+  CONSTCD11 explicit operator unsigned() const NOEXCEPT;
+  CONSTCD11 bool ok() const NOEXCEPT;
 };
 
 CONSTCD11 bool operator==(const day& x, const day& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator< (const day& x, const day& y) NOEXCEPT;
-CONSTCD11 bool operator> (const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator<(const day& x, const day& y) NOEXCEPT;
+CONSTCD11 bool operator>(const day& x, const day& y) NOEXCEPT;
 CONSTCD11 bool operator<=(const day& x, const day& y) NOEXCEPT;
 CONSTCD11 bool operator>=(const day& x, const day& y) NOEXCEPT;
 
-CONSTCD11 day  operator+(const day&  x, const days& y) NOEXCEPT;
-CONSTCD11 day  operator+(const days& x, const day&  y) NOEXCEPT;
-CONSTCD11 day  operator-(const day&  x, const days& y) NOEXCEPT;
-CONSTCD11 days operator-(const day&  x, const day&  y) NOEXCEPT;
+CONSTCD11 day operator+(const day& x, const days& y) NOEXCEPT;
+CONSTCD11 day operator+(const days& x, const day& y) NOEXCEPT;
+CONSTCD11 day operator-(const day& x, const days& y) NOEXCEPT;
+CONSTCD11 days operator-(const day& x, const day& y) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const day& d);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const day& d);
 
 // month
 
-class month
-{
-    unsigned char m_;
+class month {
+  unsigned char m_;
 
-public:
-    month() = default;
-    explicit CONSTCD11 month(unsigned m) NOEXCEPT;
+ public:
+  month() = default;
+  explicit CONSTCD11 month(unsigned m) NOEXCEPT;
 
-    CONSTCD14 month& operator++()    NOEXCEPT;
-    CONSTCD14 month  operator++(int) NOEXCEPT;
-    CONSTCD14 month& operator--()    NOEXCEPT;
-    CONSTCD14 month  operator--(int) NOEXCEPT;
+  CONSTCD14 month& operator++() NOEXCEPT;
+  CONSTCD14 month operator++(int) NOEXCEPT;
+  CONSTCD14 month& operator--() NOEXCEPT;
+  CONSTCD14 month operator--(int) NOEXCEPT;
 
-    CONSTCD14 month& operator+=(const months& m) NOEXCEPT;
-    CONSTCD14 month& operator-=(const months& m) NOEXCEPT;
+  CONSTCD14 month& operator+=(const months& m) NOEXCEPT;
+  CONSTCD14 month& operator-=(const months& m) NOEXCEPT;
 
-    CONSTCD11 explicit operator unsigned() const NOEXCEPT;
-    CONSTCD11 bool ok() const NOEXCEPT;
+  CONSTCD11 explicit operator unsigned() const NOEXCEPT;
+  CONSTCD11 bool ok() const NOEXCEPT;
 };
 
 CONSTCD11 bool operator==(const month& x, const month& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator< (const month& x, const month& y) NOEXCEPT;
-CONSTCD11 bool operator> (const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator<(const month& x, const month& y) NOEXCEPT;
+CONSTCD11 bool operator>(const month& x, const month& y) NOEXCEPT;
 CONSTCD11 bool operator<=(const month& x, const month& y) NOEXCEPT;
 CONSTCD11 bool operator>=(const month& x, const month& y) NOEXCEPT;
 
-CONSTCD14 month  operator+(const month&  x, const months& y) NOEXCEPT;
-CONSTCD14 month  operator+(const months& x,  const month& y) NOEXCEPT;
-CONSTCD14 month  operator-(const month&  x, const months& y) NOEXCEPT;
-CONSTCD14 months operator-(const month&  x,  const month& y) NOEXCEPT;
+CONSTCD14 month operator+(const month& x, const months& y) NOEXCEPT;
+CONSTCD14 month operator+(const months& x, const month& y) NOEXCEPT;
+CONSTCD14 month operator-(const month& x, const months& y) NOEXCEPT;
+CONSTCD14 months operator-(const month& x, const month& y) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month& m);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const month& m);
 
 // year
 
-class year
-{
-    short y_;
+class year {
+  short y_;
 
-public:
-    year() = default;
-    explicit CONSTCD11 year(int y) NOEXCEPT;
+ public:
+  year() = default;
+  explicit CONSTCD11 year(int y) NOEXCEPT;
 
-    CONSTCD14 year& operator++()    NOEXCEPT;
-    CONSTCD14 year  operator++(int) NOEXCEPT;
-    CONSTCD14 year& operator--()    NOEXCEPT;
-    CONSTCD14 year  operator--(int) NOEXCEPT;
+  CONSTCD14 year& operator++() NOEXCEPT;
+  CONSTCD14 year operator++(int) NOEXCEPT;
+  CONSTCD14 year& operator--() NOEXCEPT;
+  CONSTCD14 year operator--(int) NOEXCEPT;
 
-    CONSTCD14 year& operator+=(const years& y) NOEXCEPT;
-    CONSTCD14 year& operator-=(const years& y) NOEXCEPT;
+  CONSTCD14 year& operator+=(const years& y) NOEXCEPT;
+  CONSTCD14 year& operator-=(const years& y) NOEXCEPT;
 
-    CONSTCD11 year operator-() const NOEXCEPT;
-    CONSTCD11 year operator+() const NOEXCEPT;
+  CONSTCD11 year operator-() const NOEXCEPT;
+  CONSTCD11 year operator+() const NOEXCEPT;
 
-    CONSTCD11 bool is_leap() const NOEXCEPT;
+  CONSTCD11 bool is_leap() const NOEXCEPT;
 
-    CONSTCD11 explicit operator int() const NOEXCEPT;
-    CONSTCD11 bool ok() const NOEXCEPT;
+  CONSTCD11 explicit operator int() const NOEXCEPT;
+  CONSTCD11 bool ok() const NOEXCEPT;
 
-    static CONSTCD11 year min() NOEXCEPT;
-    static CONSTCD11 year max() NOEXCEPT;
+  static CONSTCD11 year min() NOEXCEPT;
+  static CONSTCD11 year max() NOEXCEPT;
 };
 
 CONSTCD11 bool operator==(const year& x, const year& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator< (const year& x, const year& y) NOEXCEPT;
-CONSTCD11 bool operator> (const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator<(const year& x, const year& y) NOEXCEPT;
+CONSTCD11 bool operator>(const year& x, const year& y) NOEXCEPT;
 CONSTCD11 bool operator<=(const year& x, const year& y) NOEXCEPT;
 CONSTCD11 bool operator>=(const year& x, const year& y) NOEXCEPT;
 
-CONSTCD11 year  operator+(const year&  x, const years& y) NOEXCEPT;
-CONSTCD11 year  operator+(const years& x, const year&  y) NOEXCEPT;
-CONSTCD11 year  operator-(const year&  x, const years& y) NOEXCEPT;
-CONSTCD11 years operator-(const year&  x, const year&  y) NOEXCEPT;
+CONSTCD11 year operator+(const year& x, const years& y) NOEXCEPT;
+CONSTCD11 year operator+(const years& x, const year& y) NOEXCEPT;
+CONSTCD11 year operator-(const year& x, const years& y) NOEXCEPT;
+CONSTCD11 years operator-(const year& x, const year& y) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year& y);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const year& y);
 
 // weekday
 
-class weekday
-{
-    unsigned char wd_;
-public:
-    weekday() = default;
-    explicit CONSTCD11 weekday(unsigned wd) NOEXCEPT;
-    CONSTCD14 weekday(const sys_days& dp) NOEXCEPT;
-    CONSTCD14 explicit weekday(const local_days& dp) NOEXCEPT;
-
-    CONSTCD14 weekday& operator++()    NOEXCEPT;
-    CONSTCD14 weekday  operator++(int) NOEXCEPT;
-    CONSTCD14 weekday& operator--()    NOEXCEPT;
-    CONSTCD14 weekday  operator--(int) NOEXCEPT;
-
-    CONSTCD14 weekday& operator+=(const days& d) NOEXCEPT;
-    CONSTCD14 weekday& operator-=(const days& d) NOEXCEPT;
-
-    CONSTCD11 bool ok() const NOEXCEPT;
-
-    CONSTCD11 unsigned c_encoding() const NOEXCEPT;
-    CONSTCD11 unsigned iso_encoding() const NOEXCEPT;
-
-    CONSTCD11 weekday_indexed operator[](unsigned index) const NOEXCEPT;
-    CONSTCD11 weekday_last    operator[](last_spec)      const NOEXCEPT;
-
-private:
-    static CONSTCD14 unsigned char weekday_from_days(int z) NOEXCEPT;
-
-    friend CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT;
-    friend CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT;
-    friend CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT;
-    template<class CharT, class Traits>
-        friend std::basic_ostream<CharT, Traits>&
-            operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd);
-    friend class weekday_indexed;
+class weekday {
+  unsigned char wd_;
+
+ public:
+  weekday() = default;
+  explicit CONSTCD11 weekday(unsigned wd) NOEXCEPT;
+  CONSTCD14 weekday(const sys_days& dp) NOEXCEPT;
+  CONSTCD14 explicit weekday(const local_days& dp) NOEXCEPT;
+
+  CONSTCD14 weekday& operator++() NOEXCEPT;
+  CONSTCD14 weekday operator++(int) NOEXCEPT;
+  CONSTCD14 weekday& operator--() NOEXCEPT;
+  CONSTCD14 weekday operator--(int) NOEXCEPT;
+
+  CONSTCD14 weekday& operator+=(const days& d) NOEXCEPT;
+  CONSTCD14 weekday& operator-=(const days& d) NOEXCEPT;
+
+  CONSTCD11 bool ok() const NOEXCEPT;
+
+  CONSTCD11 unsigned c_encoding() const NOEXCEPT;
+  CONSTCD11 unsigned iso_encoding() const NOEXCEPT;
+
+  CONSTCD11 weekday_indexed operator[](unsigned index) const NOEXCEPT;
+  CONSTCD11 weekday_last operator[](last_spec) const NOEXCEPT;
+
+ private:
+  static CONSTCD14 unsigned char weekday_from_days(int z) NOEXCEPT;
+
+  friend CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT;
+  friend CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT;
+  friend CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT;
+  template <class CharT, class Traits>
+  friend std::basic_ostream<CharT, Traits>& operator<<(
+      std::basic_ostream<CharT, Traits>& os, const weekday& wd);
+  friend class weekday_indexed;
 };
 
 CONSTCD11 bool operator==(const weekday& x, const weekday& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const weekday& x, const weekday& y) NOEXCEPT;
 
-CONSTCD14 weekday operator+(const weekday& x, const days&    y) NOEXCEPT;
-CONSTCD14 weekday operator+(const days&    x, const weekday& y) NOEXCEPT;
-CONSTCD14 weekday operator-(const weekday& x, const days&    y) NOEXCEPT;
-CONSTCD14 days    operator-(const weekday& x, const weekday& y) NOEXCEPT;
+CONSTCD14 weekday operator+(const weekday& x, const days& y) NOEXCEPT;
+CONSTCD14 weekday operator+(const days& x, const weekday& y) NOEXCEPT;
+CONSTCD14 weekday operator-(const weekday& x, const days& y) NOEXCEPT;
+CONSTCD14 days operator-(const weekday& x, const weekday& y) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const weekday& wd);
 
 // weekday_indexed
 
-class weekday_indexed
-{
-    unsigned char wd_    : 4;
-    unsigned char index_ : 4;
+class weekday_indexed {
+  unsigned char wd_ : 4;
+  unsigned char index_ : 4;
 
-public:
-    weekday_indexed() = default;
-    CONSTCD11 weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT;
+ public:
+  weekday_indexed() = default;
+  CONSTCD11 weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT;
 
-    CONSTCD11 date::weekday weekday() const NOEXCEPT;
-    CONSTCD11 unsigned index() const NOEXCEPT;
-    CONSTCD11 bool ok() const NOEXCEPT;
+  CONSTCD11 date::weekday weekday() const NOEXCEPT;
+  CONSTCD11 unsigned index() const NOEXCEPT;
+  CONSTCD11 bool ok() const NOEXCEPT;
 };
 
 CONSTCD11 bool operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_indexed& wdi);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const weekday_indexed& wdi);
 
 // weekday_last
 
-class weekday_last
-{
-    date::weekday wd_;
+class weekday_last {
+  date::weekday wd_;
 
-public:
-    explicit CONSTCD11 weekday_last(const date::weekday& wd) NOEXCEPT;
+ public:
+  explicit CONSTCD11 weekday_last(const date::weekday& wd) NOEXCEPT;
 
-    CONSTCD11 date::weekday weekday() const NOEXCEPT;
-    CONSTCD11 bool ok() const NOEXCEPT;
+  CONSTCD11 date::weekday weekday() const NOEXCEPT;
+  CONSTCD11 bool ok() const NOEXCEPT;
 };
 
 CONSTCD11 bool operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_last& wdl);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const weekday_last& wdl);
 
-namespace detail
-{
+namespace detail {
 
 struct unspecified_month_disambiguator {};
 
@@ -545,40 +527,39 @@ struct unspecified_month_disambiguator {};
 
 // year_month
 
-class year_month
-{
-    date::year  y_;
-    date::month m_;
+class year_month {
+  date::year y_;
+  date::month m_;
 
-public:
-    year_month() = default;
-    CONSTCD11 year_month(const date::year& y, const date::month& m) NOEXCEPT;
+ public:
+  year_month() = default;
+  CONSTCD11 year_month(const date::year& y, const date::month& m) NOEXCEPT;
 
-    CONSTCD11 date::year  year()  const NOEXCEPT;
-    CONSTCD11 date::month month() const NOEXCEPT;
+  CONSTCD11 date::year year() const NOEXCEPT;
+  CONSTCD11 date::month month() const NOEXCEPT;
 
-    template<class = detail::unspecified_month_disambiguator>
-    CONSTCD14 year_month& operator+=(const months& dm) NOEXCEPT;
-    template<class = detail::unspecified_month_disambiguator>
-    CONSTCD14 year_month& operator-=(const months& dm) NOEXCEPT;
-    CONSTCD14 year_month& operator+=(const years& dy) NOEXCEPT;
-    CONSTCD14 year_month& operator-=(const years& dy) NOEXCEPT;
+  template <class = detail::unspecified_month_disambiguator>
+  CONSTCD14 year_month& operator+=(const months& dm) NOEXCEPT;
+  template <class = detail::unspecified_month_disambiguator>
+  CONSTCD14 year_month& operator-=(const months& dm) NOEXCEPT;
+  CONSTCD14 year_month& operator+=(const years& dy) NOEXCEPT;
+  CONSTCD14 year_month& operator-=(const years& dy) NOEXCEPT;
 
-    CONSTCD11 bool ok() const NOEXCEPT;
+  CONSTCD11 bool ok() const NOEXCEPT;
 };
 
 CONSTCD11 bool operator==(const year_month& x, const year_month& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator< (const year_month& x, const year_month& y) NOEXCEPT;
-CONSTCD11 bool operator> (const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator<(const year_month& x, const year_month& y) NOEXCEPT;
+CONSTCD11 bool operator>(const year_month& x, const year_month& y) NOEXCEPT;
 CONSTCD11 bool operator<=(const year_month& x, const year_month& y) NOEXCEPT;
 CONSTCD11 bool operator>=(const year_month& x, const year_month& y) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
+template <class = detail::unspecified_month_disambiguator>
 CONSTCD14 year_month operator+(const year_month& ym, const months& dm) NOEXCEPT;
-template<class = detail::unspecified_month_disambiguator>
+template <class = detail::unspecified_month_disambiguator>
 CONSTCD14 year_month operator+(const months& dm, const year_month& ym) NOEXCEPT;
-template<class = detail::unspecified_month_disambiguator>
+template <class = detail::unspecified_month_disambiguator>
 CONSTCD14 year_month operator-(const year_month& ym, const months& dm) NOEXCEPT;
 
 CONSTCD11 months operator-(const year_month& x, const year_month& y) NOEXCEPT;
@@ -586,394 +567,371 @@ CONSTCD11 year_month operator+(const year_month& ym, const years& dy) NOEXCEPT;
 CONSTCD11 year_month operator+(const years& dy, const year_month& ym) NOEXCEPT;
 CONSTCD11 year_month operator-(const year_month& ym, const years& dy) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month& ym);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const year_month& ym);
 
 // month_day
 
-class month_day
-{
-    date::month m_;
-    date::day   d_;
+class month_day {
+  date::month m_;
+  date::day d_;
 
-public:
-    month_day() = default;
-    CONSTCD11 month_day(const date::month& m, const date::day& d) NOEXCEPT;
+ public:
+  month_day() = default;
+  CONSTCD11 month_day(const date::month& m, const date::day& d) NOEXCEPT;
 
-    CONSTCD11 date::month month() const NOEXCEPT;
-    CONSTCD11 date::day   day() const NOEXCEPT;
+  CONSTCD11 date::month month() const NOEXCEPT;
+  CONSTCD11 date::day day() const NOEXCEPT;
 
-    CONSTCD14 bool ok() const NOEXCEPT;
+  CONSTCD14 bool ok() const NOEXCEPT;
 };
 
 CONSTCD11 bool operator==(const month_day& x, const month_day& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator< (const month_day& x, const month_day& y) NOEXCEPT;
-CONSTCD11 bool operator> (const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator<(const month_day& x, const month_day& y) NOEXCEPT;
+CONSTCD11 bool operator>(const month_day& x, const month_day& y) NOEXCEPT;
 CONSTCD11 bool operator<=(const month_day& x, const month_day& y) NOEXCEPT;
 CONSTCD11 bool operator>=(const month_day& x, const month_day& y) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_day& md);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const month_day& md);
 
 // month_day_last
 
-class month_day_last
-{
-    date::month m_;
+class month_day_last {
+  date::month m_;
 
-public:
-    CONSTCD11 explicit month_day_last(const date::month& m) NOEXCEPT;
+ public:
+  CONSTCD11 explicit month_day_last(const date::month& m) NOEXCEPT;
 
-    CONSTCD11 date::month month() const NOEXCEPT;
-    CONSTCD11 bool ok() const NOEXCEPT;
+  CONSTCD11 date::month month() const NOEXCEPT;
+  CONSTCD11 bool ok() const NOEXCEPT;
 };
 
 CONSTCD11 bool operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator< (const month_day_last& x, const month_day_last& y) NOEXCEPT;
-CONSTCD11 bool operator> (const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator<(const month_day_last& x, const month_day_last& y) NOEXCEPT;
+CONSTCD11 bool operator>(const month_day_last& x, const month_day_last& y) NOEXCEPT;
 CONSTCD11 bool operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
 CONSTCD11 bool operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_day_last& mdl);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const month_day_last& mdl);
 
 // month_weekday
 
-class month_weekday
-{
-    date::month           m_;
-    date::weekday_indexed wdi_;
-public:
-    CONSTCD11 month_weekday(const date::month& m,
-                            const date::weekday_indexed& wdi) NOEXCEPT;
+class month_weekday {
+  date::month m_;
+  date::weekday_indexed wdi_;
 
-    CONSTCD11 date::month           month()           const NOEXCEPT;
-    CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
+ public:
+  CONSTCD11 month_weekday(const date::month& m,
+                          const date::weekday_indexed& wdi) NOEXCEPT;
 
-    CONSTCD11 bool ok() const NOEXCEPT;
+  CONSTCD11 date::month month() const NOEXCEPT;
+  CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
+
+  CONSTCD11 bool ok() const NOEXCEPT;
 };
 
 CONSTCD11 bool operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday& mwd);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const month_weekday& mwd);
 
 // month_weekday_last
 
-class month_weekday_last
-{
-    date::month        m_;
-    date::weekday_last wdl_;
+class month_weekday_last {
+  date::month m_;
+  date::weekday_last wdl_;
 
-public:
-    CONSTCD11 month_weekday_last(const date::month& m,
-                                 const date::weekday_last& wd) NOEXCEPT;
+ public:
+  CONSTCD11 month_weekday_last(const date::month& m,
+                               const date::weekday_last& wd) NOEXCEPT;
 
-    CONSTCD11 date::month        month()        const NOEXCEPT;
-    CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
+  CONSTCD11 date::month month() const NOEXCEPT;
+  CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
 
-    CONSTCD11 bool ok() const NOEXCEPT;
+  CONSTCD11 bool ok() const NOEXCEPT;
 };
 
 CONSTCD11
-    bool operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
+bool operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
 CONSTCD11
-    bool operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
+bool operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday_last& mwdl);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const month_weekday_last& mwdl);
 
 // class year_month_day
 
-class year_month_day
-{
-    date::year  y_;
-    date::month m_;
-    date::day   d_;
-
-public:
-    year_month_day() = default;
-    CONSTCD11 year_month_day(const date::year& y, const date::month& m,
-                             const date::day& d) NOEXCEPT;
-    CONSTCD14 year_month_day(const year_month_day_last& ymdl) NOEXCEPT;
-
-    CONSTCD14 year_month_day(sys_days dp) NOEXCEPT;
-    CONSTCD14 explicit year_month_day(local_days dp) NOEXCEPT;
-
-    template<class = detail::unspecified_month_disambiguator>
-    CONSTCD14 year_month_day& operator+=(const months& m) NOEXCEPT;
-    template<class = detail::unspecified_month_disambiguator>
-    CONSTCD14 year_month_day& operator-=(const months& m) NOEXCEPT;
-    CONSTCD14 year_month_day& operator+=(const years& y)  NOEXCEPT;
-    CONSTCD14 year_month_day& operator-=(const years& y)  NOEXCEPT;
-
-    CONSTCD11 date::year  year()  const NOEXCEPT;
-    CONSTCD11 date::month month() const NOEXCEPT;
-    CONSTCD11 date::day   day()   const NOEXCEPT;
-
-    CONSTCD14 operator sys_days() const NOEXCEPT;
-    CONSTCD14 explicit operator local_days() const NOEXCEPT;
-    CONSTCD14 bool ok() const NOEXCEPT;
-
-private:
-    static CONSTCD14 year_month_day from_days(days dp) NOEXCEPT;
-    CONSTCD14 days to_days() const NOEXCEPT;
+class year_month_day {
+  date::year y_;
+  date::month m_;
+  date::day d_;
+
+ public:
+  year_month_day() = default;
+  CONSTCD11 year_month_day(const date::year& y, const date::month& m,
+                           const date::day& d) NOEXCEPT;
+  CONSTCD14 year_month_day(const year_month_day_last& ymdl) NOEXCEPT;
+
+  CONSTCD14 year_month_day(sys_days dp) NOEXCEPT;
+  CONSTCD14 explicit year_month_day(local_days dp) NOEXCEPT;
+
+  template <class = detail::unspecified_month_disambiguator>
+  CONSTCD14 year_month_day& operator+=(const months& m) NOEXCEPT;
+  template <class = detail::unspecified_month_disambiguator>
+  CONSTCD14 year_month_day& operator-=(const months& m) NOEXCEPT;
+  CONSTCD14 year_month_day& operator+=(const years& y) NOEXCEPT;
+  CONSTCD14 year_month_day& operator-=(const years& y) NOEXCEPT;
+
+  CONSTCD11 date::year year() const NOEXCEPT;
+  CONSTCD11 date::month month() const NOEXCEPT;
+  CONSTCD11 date::day day() const NOEXCEPT;
+
+  CONSTCD14 operator sys_days() const NOEXCEPT;
+  CONSTCD14 explicit operator local_days() const NOEXCEPT;
+  CONSTCD14 bool ok() const NOEXCEPT;
+
+ private:
+  static CONSTCD14 year_month_day from_days(days dp) NOEXCEPT;
+  CONSTCD14 days to_days() const NOEXCEPT;
 };
 
 CONSTCD11 bool operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT;
 CONSTCD11 bool operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator< (const year_month_day& x, const year_month_day& y) NOEXCEPT;
-CONSTCD11 bool operator> (const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator<(const year_month_day& x, const year_month_day& y) NOEXCEPT;
+CONSTCD11 bool operator>(const year_month_day& x, const year_month_day& y) NOEXCEPT;
 CONSTCD11 bool operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
 CONSTCD11 bool operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
+template <class = detail::unspecified_month_disambiguator>
 CONSTCD14 year_month_day operator+(const year_month_day& ymd, const months& dm) NOEXCEPT;
-template<class = detail::unspecified_month_disambiguator>
+template <class = detail::unspecified_month_disambiguator>
 CONSTCD14 year_month_day operator+(const months& dm, const year_month_day& ymd) NOEXCEPT;
-template<class = detail::unspecified_month_disambiguator>
+template <class = detail::unspecified_month_disambiguator>
 CONSTCD14 year_month_day operator-(const year_month_day& ymd, const months& dm) NOEXCEPT;
-CONSTCD11 year_month_day operator+(const year_month_day& ymd, const years& dy)  NOEXCEPT;
-CONSTCD11 year_month_day operator+(const years& dy, const year_month_day& ymd)  NOEXCEPT;
-CONSTCD11 year_month_day operator-(const year_month_day& ymd, const years& dy)  NOEXCEPT;
+CONSTCD11 year_month_day operator+(const year_month_day& ymd, const years& dy) NOEXCEPT;
+CONSTCD11 year_month_day operator+(const years& dy, const year_month_day& ymd) NOEXCEPT;
+CONSTCD11 year_month_day operator-(const year_month_day& ymd, const years& dy) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day& ymd);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const year_month_day& ymd);
 
 // year_month_day_last
 
-class year_month_day_last
-{
-    date::year           y_;
-    date::month_day_last mdl_;
-
-public:
-    CONSTCD11 year_month_day_last(const date::year& y,
-                                  const date::month_day_last& mdl) NOEXCEPT;
-
-    template<class = detail::unspecified_month_disambiguator>
-    CONSTCD14 year_month_day_last& operator+=(const months& m) NOEXCEPT;
-    template<class = detail::unspecified_month_disambiguator>
-    CONSTCD14 year_month_day_last& operator-=(const months& m) NOEXCEPT;
-    CONSTCD14 year_month_day_last& operator+=(const years& y)  NOEXCEPT;
-    CONSTCD14 year_month_day_last& operator-=(const years& y)  NOEXCEPT;
-
-    CONSTCD11 date::year           year()           const NOEXCEPT;
-    CONSTCD11 date::month          month()          const NOEXCEPT;
-    CONSTCD11 date::month_day_last month_day_last() const NOEXCEPT;
-    CONSTCD14 date::day            day()            const NOEXCEPT;
-
-    CONSTCD14 operator sys_days() const NOEXCEPT;
-    CONSTCD14 explicit operator local_days() const NOEXCEPT;
-    CONSTCD11 bool ok() const NOEXCEPT;
+class year_month_day_last {
+  date::year y_;
+  date::month_day_last mdl_;
+
+ public:
+  CONSTCD11 year_month_day_last(const date::year& y,
+                                const date::month_day_last& mdl) NOEXCEPT;
+
+  template <class = detail::unspecified_month_disambiguator>
+  CONSTCD14 year_month_day_last& operator+=(const months& m) NOEXCEPT;
+  template <class = detail::unspecified_month_disambiguator>
+  CONSTCD14 year_month_day_last& operator-=(const months& m) NOEXCEPT;
+  CONSTCD14 year_month_day_last& operator+=(const years& y) NOEXCEPT;
+  CONSTCD14 year_month_day_last& operator-=(const years& y) NOEXCEPT;
+
+  CONSTCD11 date::year year() const NOEXCEPT;
+  CONSTCD11 date::month month() const NOEXCEPT;
+  CONSTCD11 date::month_day_last month_day_last() const NOEXCEPT;
+  CONSTCD14 date::day day() const NOEXCEPT;
+
+  CONSTCD14 operator sys_days() const NOEXCEPT;
+  CONSTCD14 explicit operator local_days() const NOEXCEPT;
+  CONSTCD11 bool ok() const NOEXCEPT;
 };
 
 CONSTCD11
-    bool operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+bool operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
 CONSTCD11
-    bool operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+bool operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
 CONSTCD11
-    bool operator< (const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+bool operator<(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
 CONSTCD11
-    bool operator> (const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+bool operator>(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
 CONSTCD11
-    bool operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+bool operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
 CONSTCD11
-    bool operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
+bool operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_day_last
-operator+(const year_month_day_last& ymdl, const months& dm) NOEXCEPT;
+template <class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_day_last operator+(const year_month_day_last& ymdl,
+                                        const months& dm) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_day_last
-operator+(const months& dm, const year_month_day_last& ymdl) NOEXCEPT;
+template <class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_day_last operator+(const months& dm,
+                                        const year_month_day_last& ymdl) NOEXCEPT;
 
 CONSTCD11
-year_month_day_last
-operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
+year_month_day_last operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
 
 CONSTCD11
-year_month_day_last
-operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT;
+year_month_day_last operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_day_last
-operator-(const year_month_day_last& ymdl, const months& dm) NOEXCEPT;
+template <class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_day_last operator-(const year_month_day_last& ymdl,
+                                        const months& dm) NOEXCEPT;
 
 CONSTCD11
-year_month_day_last
-operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
+year_month_day_last operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day_last& ymdl);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const year_month_day_last& ymdl);
 
 // year_month_weekday
 
-class year_month_weekday
-{
-    date::year            y_;
-    date::month           m_;
-    date::weekday_indexed wdi_;
-
-public:
-    year_month_weekday() = default;
-    CONSTCD11 year_month_weekday(const date::year& y, const date::month& m,
-                                   const date::weekday_indexed& wdi) NOEXCEPT;
-    CONSTCD14 year_month_weekday(const sys_days& dp) NOEXCEPT;
-    CONSTCD14 explicit year_month_weekday(const local_days& dp) NOEXCEPT;
-
-    template<class = detail::unspecified_month_disambiguator>
-    CONSTCD14 year_month_weekday& operator+=(const months& m) NOEXCEPT;
-    template<class = detail::unspecified_month_disambiguator>
-    CONSTCD14 year_month_weekday& operator-=(const months& m) NOEXCEPT;
-    CONSTCD14 year_month_weekday& operator+=(const years& y)  NOEXCEPT;
-    CONSTCD14 year_month_weekday& operator-=(const years& y)  NOEXCEPT;
-
-    CONSTCD11 date::year year() const NOEXCEPT;
-    CONSTCD11 date::month month() const NOEXCEPT;
-    CONSTCD11 date::weekday weekday() const NOEXCEPT;
-    CONSTCD11 unsigned index() const NOEXCEPT;
-    CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
-
-    CONSTCD14 operator sys_days() const NOEXCEPT;
-    CONSTCD14 explicit operator local_days() const NOEXCEPT;
-    CONSTCD14 bool ok() const NOEXCEPT;
-
-private:
-    static CONSTCD14 year_month_weekday from_days(days dp) NOEXCEPT;
-    CONSTCD14 days to_days() const NOEXCEPT;
+class year_month_weekday {
+  date::year y_;
+  date::month m_;
+  date::weekday_indexed wdi_;
+
+ public:
+  year_month_weekday() = default;
+  CONSTCD11 year_month_weekday(const date::year& y, const date::month& m,
+                               const date::weekday_indexed& wdi) NOEXCEPT;
+  CONSTCD14 year_month_weekday(const sys_days& dp) NOEXCEPT;
+  CONSTCD14 explicit year_month_weekday(const local_days& dp) NOEXCEPT;
+
+  template <class = detail::unspecified_month_disambiguator>
+  CONSTCD14 year_month_weekday& operator+=(const months& m) NOEXCEPT;
+  template <class = detail::unspecified_month_disambiguator>
+  CONSTCD14 year_month_weekday& operator-=(const months& m) NOEXCEPT;
+  CONSTCD14 year_month_weekday& operator+=(const years& y) NOEXCEPT;
+  CONSTCD14 year_month_weekday& operator-=(const years& y) NOEXCEPT;
+
+  CONSTCD11 date::year year() const NOEXCEPT;
+  CONSTCD11 date::month month() const NOEXCEPT;
+  CONSTCD11 date::weekday weekday() const NOEXCEPT;
+  CONSTCD11 unsigned index() const NOEXCEPT;
+  CONSTCD11 date::weekday_indexed weekday_indexed() const NOEXCEPT;
+
+  CONSTCD14 operator sys_days() const NOEXCEPT;
+  CONSTCD14 explicit operator local_days() const NOEXCEPT;
+  CONSTCD14 bool ok() const NOEXCEPT;
+
+ private:
+  static CONSTCD14 year_month_weekday from_days(days dp) NOEXCEPT;
+  CONSTCD14 days to_days() const NOEXCEPT;
 };
 
 CONSTCD11
-    bool operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
+bool operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
 CONSTCD11
-    bool operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
+bool operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday
-operator+(const year_month_weekday& ymwd, const months& dm) NOEXCEPT;
+template <class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_weekday operator+(const year_month_weekday& ymwd,
+                                       const months& dm) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday
-operator+(const months& dm, const year_month_weekday& ymwd) NOEXCEPT;
+template <class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_weekday operator+(const months& dm,
+                                       const year_month_weekday& ymwd) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday
-operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
+year_month_weekday operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday
-operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT;
+year_month_weekday operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday
-operator-(const year_month_weekday& ymwd, const months& dm) NOEXCEPT;
+template <class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_weekday operator-(const year_month_weekday& ymwd,
+                                       const months& dm) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday
-operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
+year_month_weekday operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday& ymwdi);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const year_month_weekday& ymwdi);
 
 // year_month_weekday_last
 
-class year_month_weekday_last
-{
-    date::year y_;
-    date::month m_;
-    date::weekday_last wdl_;
-
-public:
-    CONSTCD11 year_month_weekday_last(const date::year& y, const date::month& m,
-                                      const date::weekday_last& wdl) NOEXCEPT;
-
-    template<class = detail::unspecified_month_disambiguator>
-    CONSTCD14 year_month_weekday_last& operator+=(const months& m) NOEXCEPT;
-    template<class = detail::unspecified_month_disambiguator>
-    CONSTCD14 year_month_weekday_last& operator-=(const months& m) NOEXCEPT;
-    CONSTCD14 year_month_weekday_last& operator+=(const years& y) NOEXCEPT;
-    CONSTCD14 year_month_weekday_last& operator-=(const years& y) NOEXCEPT;
-
-    CONSTCD11 date::year year() const NOEXCEPT;
-    CONSTCD11 date::month month() const NOEXCEPT;
-    CONSTCD11 date::weekday weekday() const NOEXCEPT;
-    CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
-
-    CONSTCD14 operator sys_days() const NOEXCEPT;
-    CONSTCD14 explicit operator local_days() const NOEXCEPT;
-    CONSTCD11 bool ok() const NOEXCEPT;
-
-private:
-    CONSTCD14 days to_days() const NOEXCEPT;
+class year_month_weekday_last {
+  date::year y_;
+  date::month m_;
+  date::weekday_last wdl_;
+
+ public:
+  CONSTCD11 year_month_weekday_last(const date::year& y, const date::month& m,
+                                    const date::weekday_last& wdl) NOEXCEPT;
+
+  template <class = detail::unspecified_month_disambiguator>
+  CONSTCD14 year_month_weekday_last& operator+=(const months& m) NOEXCEPT;
+  template <class = detail::unspecified_month_disambiguator>
+  CONSTCD14 year_month_weekday_last& operator-=(const months& m) NOEXCEPT;
+  CONSTCD14 year_month_weekday_last& operator+=(const years& y) NOEXCEPT;
+  CONSTCD14 year_month_weekday_last& operator-=(const years& y) NOEXCEPT;
+
+  CONSTCD11 date::year year() const NOEXCEPT;
+  CONSTCD11 date::month month() const NOEXCEPT;
+  CONSTCD11 date::weekday weekday() const NOEXCEPT;
+  CONSTCD11 date::weekday_last weekday_last() const NOEXCEPT;
+
+  CONSTCD14 operator sys_days() const NOEXCEPT;
+  CONSTCD14 explicit operator local_days() const NOEXCEPT;
+  CONSTCD11 bool ok() const NOEXCEPT;
+
+ private:
+  CONSTCD14 days to_days() const NOEXCEPT;
 };
 
 CONSTCD11
-bool
-operator==(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT;
+bool operator==(const year_month_weekday_last& x,
+                const year_month_weekday_last& y) NOEXCEPT;
 
 CONSTCD11
-bool
-operator!=(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT;
+bool operator!=(const year_month_weekday_last& x,
+                const year_month_weekday_last& y) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday_last
-operator+(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT;
+template <class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_weekday_last operator+(const year_month_weekday_last& ymwdl,
+                                            const months& dm) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday_last
+template <class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_weekday_last
 operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday_last
-operator+(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT;
+year_month_weekday_last operator+(const year_month_weekday_last& ymwdl,
+                                  const years& dy) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday_last
-operator+(const years& dy, const year_month_weekday_last& ymwdl) NOEXCEPT;
+year_month_weekday_last operator+(const years& dy,
+                                  const year_month_weekday_last& ymwdl) NOEXCEPT;
 
-template<class = detail::unspecified_month_disambiguator>
-CONSTCD14
-year_month_weekday_last
-operator-(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT;
+template <class = detail::unspecified_month_disambiguator>
+CONSTCD14 year_month_weekday_last operator-(const year_month_weekday_last& ymwdl,
+                                            const months& dm) NOEXCEPT;
 
 CONSTCD11
-year_month_weekday_last
-operator-(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT;
+year_month_weekday_last operator-(const year_month_weekday_last& ymwdl,
+                                  const years& dy) NOEXCEPT;
 
-template<class CharT, class Traits>
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last& ymwdl);
+template <class CharT, class Traits>
+std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os,
+                                              const year_month_weekday_last& ymwdl);
 
 #if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline namespace literals
-{
+inline namespace literals {
 
-CONSTCD11 date::day  operator "" _d(unsigned long long d) NOEXCEPT;
-CONSTCD11 date::year operator "" _y(unsigned long long y) NOEXCEPT;
+CONSTCD11 date::day operator"" _d(unsigned long long d) NOEXCEPT;
+CONSTCD11 date::year operator"" _y(unsigned long long y) NOEXCEPT;
 
-}  // inline namespace literals
-#endif // !defined(_MSC_VER) || (_MSC_VER >= 1900)
+}  // namespace literals
+#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
 
 // CONSTDATA date::month January{1};
 // CONSTDATA date::month February{2};
@@ -999,16 +957,13 @@ CONSTCD11 date::year operator "" _y(unsigned long long y) NOEXCEPT;
 #if HAS_VOID_T
 
 template <class T, class = std::void_t<>>
-struct is_clock
-    : std::false_type
-{};
+struct is_clock : std::false_type {};
 
 template <class T>
-struct is_clock<T, std::void_t<decltype(T::now()), typename T::rep, typename T::period,
-                               typename T::duration, typename T::time_point,
-                               decltype(T::is_steady)>>
-    : std::true_type
-{};
+struct is_clock<
+    T, std::void_t<decltype(T::now()), typename T::rep, typename T::period,
+                   typename T::duration, typename T::time_point, decltype(T::is_steady)>>
+    : std::true_type {};
 
 #endif  // HAS_VOID_T
 
@@ -1019,886 +974,652 @@ struct is_clock<T, std::void_t<decltype(T::now()), typename T::rep, typename T::
 // utilities
 namespace detail {
 
-template<class CharT, class Traits = std::char_traits<CharT>>
-class save_istream
-{
-protected:
-    std::basic_ios<CharT, Traits>& is_;
-    CharT fill_;
-    std::ios::fmtflags flags_;
-    std::streamsize width_;
-    std::basic_ostream<CharT, Traits>* tie_;
-    std::locale loc_;
-
-public:
-    ~save_istream()
-    {
-        is_.fill(fill_);
-        is_.flags(flags_);
-        is_.width(width_);
-        is_.imbue(loc_);
-        is_.tie(tie_);
-    }
-
-    save_istream(const save_istream&) = delete;
-    save_istream& operator=(const save_istream&) = delete;
-
-    explicit save_istream(std::basic_ios<CharT, Traits>& is)
-        : is_(is)
-        , fill_(is.fill())
-        , flags_(is.flags())
-        , width_(is.width(0))
-        , tie_(is.tie(nullptr))
-        , loc_(is.getloc())
-        {
-            if (tie_ != nullptr)
-                tie_->flush();
-        }
+template <class CharT, class Traits = std::char_traits<CharT>>
+class save_istream {
+ protected:
+  std::basic_ios<CharT, Traits>& is_;
+  CharT fill_;
+  std::ios::fmtflags flags_;
+  std::streamsize width_;
+  std::basic_ostream<CharT, Traits>* tie_;
+  std::locale loc_;
+
+ public:
+  ~save_istream() {
+    is_.fill(fill_);
+    is_.flags(flags_);
+    is_.width(width_);
+    is_.imbue(loc_);
+    is_.tie(tie_);
+  }
+
+  save_istream(const save_istream&) = delete;
+  save_istream& operator=(const save_istream&) = delete;
+
+  explicit save_istream(std::basic_ios<CharT, Traits>& is)
+      : is_(is),
+        fill_(is.fill()),
+        flags_(is.flags()),
+        width_(is.width(0)),
+        tie_(is.tie(nullptr)),
+        loc_(is.getloc()) {
+    if (tie_ != nullptr) tie_->flush();
+  }
 };
 
-template<class CharT, class Traits = std::char_traits<CharT>>
-class save_ostream
-    : private save_istream<CharT, Traits>
-{
-public:
-    ~save_ostream()
-    {
-        if ((this->flags_ & std::ios::unitbuf) &&
+template <class CharT, class Traits = std::char_traits<CharT>>
+class save_ostream : private save_istream<CharT, Traits> {
+ public:
+  ~save_ostream() {
+    if ((this->flags_ & std::ios::unitbuf) &&
 #if HAS_UNCAUGHT_EXCEPTIONS
-                std::uncaught_exceptions() == 0 &&
+        std::uncaught_exceptions() == 0 &&
 #else
-                !std::uncaught_exception() &&
+        !std::uncaught_exception() &&
 #endif
-                this->is_.good())
-            this->is_.rdbuf()->pubsync();
-    }
+        this->is_.good())
+      this->is_.rdbuf()->pubsync();
+  }
 
-    save_ostream(const save_ostream&) = delete;
-    save_ostream& operator=(const save_ostream&) = delete;
+  save_ostream(const save_ostream&) = delete;
+  save_ostream& operator=(const save_ostream&) = delete;
 
-    explicit save_ostream(std::basic_ios<CharT, Traits>& os)
-        : save_istream<CharT, Traits>(os)
-        {
-        }
+  explicit save_ostream(std::basic_ios<CharT, Traits>& os)
+      : save_istream<CharT, Traits>(os) {}
 };
 
 template <class T>
-struct choose_trunc_type
-{
-    static const int digits = std::numeric_limits<T>::digits;
-    using type = typename std::conditional
-                 <
-                     digits < 32,
-                     std::int32_t,
-                     typename std::conditional
-                     <
-                         digits < 64,
-                         std::int64_t,
+struct choose_trunc_type {
+  static const int digits = std::numeric_limits<T>::digits;
+  using type = typename std::conditional < digits < 32, std::int32_t,
+        typename std::conditional<digits<64, std::int64_t,
 #ifdef __SIZEOF_INT128__
-                         __int128
+                                         __int128
 #else
-                         std::int64_t
+                                         std::int64_t
 #endif
-                     >::type
-                 >::type;
+                                         >::type>::type;
 };
 
 template <class T>
-CONSTCD11
-inline
-typename std::enable_if
-<
-    !std::chrono::treat_as_floating_point<T>::value,
-    T
->::type
-trunc(T t) NOEXCEPT
-{
-    return t;
+CONSTCD11 inline
+    typename std::enable_if<!std::chrono::treat_as_floating_point<T>::value, T>::type
+    trunc(T t) NOEXCEPT {
+  return t;
 }
 
 template <class T>
-CONSTCD14
-inline
-typename std::enable_if
-<
-    std::chrono::treat_as_floating_point<T>::value,
-    T
->::type
-trunc(T t) NOEXCEPT
-{
-    using std::numeric_limits;
-    using I = typename choose_trunc_type<T>::type;
-    CONSTDATA auto digits = numeric_limits<T>::digits;
-    static_assert(digits < numeric_limits<I>::digits, "");
-    CONSTDATA auto max = I{1} << (digits-1);
-    CONSTDATA auto min = -max;
-    const auto negative = t < T{0};
-    if (min <= t && t <= max && t != 0 && t == t)
-    {
-        t = static_cast<T>(static_cast<I>(t));
-        if (t == 0 && negative)
-            t = -t;
-    }
-    return t;
+CONSTCD14 inline
+    typename std::enable_if<std::chrono::treat_as_floating_point<T>::value, T>::type
+    trunc(T t) NOEXCEPT {
+  using std::numeric_limits;
+  using I = typename choose_trunc_type<T>::type;
+  CONSTDATA auto digits = numeric_limits<T>::digits;
+  static_assert(digits < numeric_limits<I>::digits, "");
+  CONSTDATA auto max = I{1} << (digits - 1);
+  CONSTDATA auto min = -max;
+  const auto negative = t < T{0};
+  if (min <= t && t <= max && t != 0 && t == t) {
+    t = static_cast<T>(static_cast<I>(t));
+    if (t == 0 && negative) t = -t;
+  }
+  return t;
 }
 
 template <std::intmax_t Xp, std::intmax_t Yp>
-struct static_gcd
-{
-    static const std::intmax_t value = static_gcd<Yp, Xp % Yp>::value;
+struct static_gcd {
+  static const std::intmax_t value = static_gcd<Yp, Xp % Yp>::value;
 };
 
 template <std::intmax_t Xp>
-struct static_gcd<Xp, 0>
-{
-    static const std::intmax_t value = Xp;
+struct static_gcd<Xp, 0> {
+  static const std::intmax_t value = Xp;
 };
 
 template <>
-struct static_gcd<0, 0>
-{
-    static const std::intmax_t value = 1;
+struct static_gcd<0, 0> {
+  static const std::intmax_t value = 1;
 };
 
 template <class R1, class R2>
-struct no_overflow
-{
-private:
-    static const std::intmax_t gcd_n1_n2 = static_gcd<R1::num, R2::num>::value;
-    static const std::intmax_t gcd_d1_d2 = static_gcd<R1::den, R2::den>::value;
-    static const std::intmax_t n1 = R1::num / gcd_n1_n2;
-    static const std::intmax_t d1 = R1::den / gcd_d1_d2;
-    static const std::intmax_t n2 = R2::num / gcd_n1_n2;
-    static const std::intmax_t d2 = R2::den / gcd_d1_d2;
-    static const std::intmax_t max = -((std::intmax_t(1) <<
-                                       (sizeof(std::intmax_t) * CHAR_BIT - 1)) + 1);
-
-    template <std::intmax_t Xp, std::intmax_t Yp, bool overflow>
-    struct mul    // overflow == false
-    {
-        static const std::intmax_t value = Xp * Yp;
-    };
-
-    template <std::intmax_t Xp, std::intmax_t Yp>
-    struct mul<Xp, Yp, true>
-    {
-        static const std::intmax_t value = 1;
-    };
-
-public:
-    static const bool value = (n1 <= max / d2) && (n2 <= max / d1);
-    typedef std::ratio<mul<n1, d2, !value>::value,
-                       mul<n2, d1, !value>::value> type;
+struct no_overflow {
+ private:
+  static const std::intmax_t gcd_n1_n2 = static_gcd<R1::num, R2::num>::value;
+  static const std::intmax_t gcd_d1_d2 = static_gcd<R1::den, R2::den>::value;
+  static const std::intmax_t n1 = R1::num / gcd_n1_n2;
+  static const std::intmax_t d1 = R1::den / gcd_d1_d2;
+  static const std::intmax_t n2 = R2::num / gcd_n1_n2;
+  static const std::intmax_t d2 = R2::den / gcd_d1_d2;
+  static const std::intmax_t max =
+      -((std::intmax_t(1) << (sizeof(std::intmax_t) * CHAR_BIT - 1)) + 1);
+
+  template <std::intmax_t Xp, std::intmax_t Yp, bool overflow>
+  struct mul  // overflow == false
+  {
+    static const std::intmax_t value = Xp * Yp;
+  };
+
+  template <std::intmax_t Xp, std::intmax_t Yp>
+  struct mul<Xp, Yp, true> {
+    static const std::intmax_t value = 1;
+  };
+
+ public:
+  static const bool value = (n1 <= max / d2) && (n2 <= max / d1);
+  typedef std::ratio<mul<n1, d2, !value>::value, mul<n2, d1, !value>::value> type;
 };
 
-}  // detail
+}  // namespace detail
 
 // trunc towards zero
 template <class To, class Rep, class Period>
-CONSTCD11
-inline
-typename std::enable_if
-<
-    detail::no_overflow<Period, typename To::period>::value,
-    To
->::type
-trunc(const std::chrono::duration<Rep, Period>& d)
-{
-    return To{detail::trunc(std::chrono::duration_cast<To>(d).count())};
+CONSTCD11 inline
+    typename std::enable_if<detail::no_overflow<Period, typename To::period>::value,
+                            To>::type
+    trunc(const std::chrono::duration<Rep, Period>& d) {
+  return To{detail::trunc(std::chrono::duration_cast<To>(d).count())};
 }
 
 template <class To, class Rep, class Period>
-CONSTCD11
-inline
-typename std::enable_if
-<
-    !detail::no_overflow<Period, typename To::period>::value,
-    To
->::type
-trunc(const std::chrono::duration<Rep, Period>& d)
-{
-    using std::chrono::duration_cast;
-    using std::chrono::duration;
-    using rep = typename std::common_type<Rep, typename To::rep>::type;
-    return To{detail::trunc(duration_cast<To>(duration_cast<duration<rep>>(d)).count())};
+CONSTCD11 inline
+    typename std::enable_if<!detail::no_overflow<Period, typename To::period>::value,
+                            To>::type
+    trunc(const std::chrono::duration<Rep, Period>& d) {
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using rep = typename std::common_type<Rep, typename To::rep>::type;
+  return To{detail::trunc(duration_cast<To>(duration_cast<duration<rep>>(d)).count())};
 }
 
 #ifndef HAS_CHRONO_ROUNDING
-#  if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190023918 || (_MSC_FULL_VER >= 190000000 && defined (__clang__)))
-#    define HAS_CHRONO_ROUNDING 1
-#  elif defined(__cpp_lib_chrono) && __cplusplus > 201402 && __cpp_lib_chrono >= 201510
-#    define HAS_CHRONO_ROUNDING 1
-#  elif defined(_LIBCPP_VERSION) && __cplusplus > 201402 && _LIBCPP_VERSION >= 3800
-#    define HAS_CHRONO_ROUNDING 1
-#  else
-#    define HAS_CHRONO_ROUNDING 0
-#  endif
+#if defined(_MSC_FULL_VER) && \
+    (_MSC_FULL_VER >= 190023918 || (_MSC_FULL_VER >= 190000000 && defined(__clang__)))
+#define HAS_CHRONO_ROUNDING 1
+#elif defined(__cpp_lib_chrono) && __cplusplus > 201402 && __cpp_lib_chrono >= 201510
+#define HAS_CHRONO_ROUNDING 1
+#elif defined(_LIBCPP_VERSION) && __cplusplus > 201402 && _LIBCPP_VERSION >= 3800
+#define HAS_CHRONO_ROUNDING 1
+#else
+#define HAS_CHRONO_ROUNDING 0
+#endif
 #endif  // HAS_CHRONO_ROUNDING
 
 #if HAS_CHRONO_ROUNDING == 0
 
 // round down
 template <class To, class Rep, class Period>
-CONSTCD14
-inline
-typename std::enable_if
-<
-    detail::no_overflow<Period, typename To::period>::value,
-    To
->::type
-floor(const std::chrono::duration<Rep, Period>& d)
-{
-    auto t = trunc<To>(d);
-    if (t > d)
-        return t - To{1};
-    return t;
+CONSTCD14 inline
+    typename std::enable_if<detail::no_overflow<Period, typename To::period>::value,
+                            To>::type
+    floor(const std::chrono::duration<Rep, Period>& d) {
+  auto t = trunc<To>(d);
+  if (t > d) return t - To{1};
+  return t;
 }
 
 template <class To, class Rep, class Period>
-CONSTCD14
-inline
-typename std::enable_if
-<
-    !detail::no_overflow<Period, typename To::period>::value,
-    To
->::type
-floor(const std::chrono::duration<Rep, Period>& d)
-{
-    using rep = typename std::common_type<Rep, typename To::rep>::type;
-    return floor<To>(floor<std::chrono::duration<rep>>(d));
+CONSTCD14 inline
+    typename std::enable_if<!detail::no_overflow<Period, typename To::period>::value,
+                            To>::type
+    floor(const std::chrono::duration<Rep, Period>& d) {
+  using rep = typename std::common_type<Rep, typename To::rep>::type;
+  return floor<To>(floor<std::chrono::duration<rep>>(d));
 }
 
 // round to nearest, to even on tie
 template <class To, class Rep, class Period>
-CONSTCD14
-inline
-To
-round(const std::chrono::duration<Rep, Period>& d)
-{
-    auto t0 = floor<To>(d);
-    auto t1 = t0 + To{1};
-    if (t1 == To{0} && t0 < To{0})
-        t1 = -t1;
-    auto diff0 = d - t0;
-    auto diff1 = t1 - d;
-    if (diff0 == diff1)
-    {
-        if (t0 - trunc<To>(t0/2)*2 == To{0})
-            return t0;
-        return t1;
-    }
-    if (diff0 < diff1)
-        return t0;
+CONSTCD14 inline To round(const std::chrono::duration<Rep, Period>& d) {
+  auto t0 = floor<To>(d);
+  auto t1 = t0 + To{1};
+  if (t1 == To{0} && t0 < To{0}) t1 = -t1;
+  auto diff0 = d - t0;
+  auto diff1 = t1 - d;
+  if (diff0 == diff1) {
+    if (t0 - trunc<To>(t0 / 2) * 2 == To{0}) return t0;
     return t1;
+  }
+  if (diff0 < diff1) return t0;
+  return t1;
 }
 
 // round up
 template <class To, class Rep, class Period>
-CONSTCD14
-inline
-To
-ceil(const std::chrono::duration<Rep, Period>& d)
-{
-    auto t = trunc<To>(d);
-    if (t < d)
-        return t + To{1};
-    return t;
+CONSTCD14 inline To ceil(const std::chrono::duration<Rep, Period>& d) {
+  auto t = trunc<To>(d);
+  if (t < d) return t + To{1};
+  return t;
 }
 
 template <class Rep, class Period,
-          class = typename std::enable_if
-          <
-              std::numeric_limits<Rep>::is_signed
-          >::type>
-CONSTCD11
-std::chrono::duration<Rep, Period>
-abs(std::chrono::duration<Rep, Period> d)
-{
-    return d >= d.zero() ? d : -d;
+          class = typename std::enable_if<std::numeric_limits<Rep>::is_signed>::type>
+CONSTCD11 std::chrono::duration<Rep, Period> abs(std::chrono::duration<Rep, Period> d) {
+  return d >= d.zero() ? d : -d;
 }
 
 // round down
 template <class To, class Clock, class FromDuration>
-CONSTCD11
-inline
-std::chrono::time_point<Clock, To>
-floor(const std::chrono::time_point<Clock, FromDuration>& tp)
-{
-    using std::chrono::time_point;
-    return time_point<Clock, To>{date::floor<To>(tp.time_since_epoch())};
+CONSTCD11 inline std::chrono::time_point<Clock, To> floor(
+    const std::chrono::time_point<Clock, FromDuration>& tp) {
+  using std::chrono::time_point;
+  return time_point<Clock, To>{date::floor<To>(tp.time_since_epoch())};
 }
 
 // round to nearest, to even on tie
 template <class To, class Clock, class FromDuration>
-CONSTCD11
-inline
-std::chrono::time_point<Clock, To>
-round(const std::chrono::time_point<Clock, FromDuration>& tp)
-{
-    using std::chrono::time_point;
-    return time_point<Clock, To>{round<To>(tp.time_since_epoch())};
+CONSTCD11 inline std::chrono::time_point<Clock, To> round(
+    const std::chrono::time_point<Clock, FromDuration>& tp) {
+  using std::chrono::time_point;
+  return time_point<Clock, To>{round<To>(tp.time_since_epoch())};
 }
 
 // round up
 template <class To, class Clock, class FromDuration>
-CONSTCD11
-inline
-std::chrono::time_point<Clock, To>
-ceil(const std::chrono::time_point<Clock, FromDuration>& tp)
-{
-    using std::chrono::time_point;
-    return time_point<Clock, To>{ceil<To>(tp.time_since_epoch())};
+CONSTCD11 inline std::chrono::time_point<Clock, To> ceil(
+    const std::chrono::time_point<Clock, FromDuration>& tp) {
+  using std::chrono::time_point;
+  return time_point<Clock, To>{ceil<To>(tp.time_since_epoch())};
 }
 
 #else  // HAS_CHRONO_ROUNDING == 1
 
-using std::chrono::floor;
+using std::chrono::abs;
 using std::chrono::ceil;
+using std::chrono::floor;
 using std::chrono::round;
-using std::chrono::abs;
 
 #endif  // HAS_CHRONO_ROUNDING
 
 // trunc towards zero
 template <class To, class Clock, class FromDuration>
-CONSTCD11
-inline
-std::chrono::time_point<Clock, To>
-trunc(const std::chrono::time_point<Clock, FromDuration>& tp)
-{
-    using std::chrono::time_point;
-    return time_point<Clock, To>{trunc<To>(tp.time_since_epoch())};
+CONSTCD11 inline std::chrono::time_point<Clock, To> trunc(
+    const std::chrono::time_point<Clock, FromDuration>& tp) {
+  using std::chrono::time_point;
+  return time_point<Clock, To>{trunc<To>(tp.time_since_epoch())};
 }
 
 // day
 
 CONSTCD11 inline day::day(unsigned d) NOEXCEPT : d_(static_cast<unsigned char>(d)) {}
-CONSTCD14 inline day& day::operator++() NOEXCEPT {++d_; return *this;}
-CONSTCD14 inline day day::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
-CONSTCD14 inline day& day::operator--() NOEXCEPT {--d_; return *this;}
-CONSTCD14 inline day day::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
-CONSTCD14 inline day& day::operator+=(const days& d) NOEXCEPT {*this = *this + d; return *this;}
-CONSTCD14 inline day& day::operator-=(const days& d) NOEXCEPT {*this = *this - d; return *this;}
-CONSTCD11 inline day::operator unsigned() const NOEXCEPT {return d_;}
-CONSTCD11 inline bool day::ok() const NOEXCEPT {return 1 <= d_ && d_ <= 31;}
+CONSTCD14 inline day& day::operator++() NOEXCEPT {
+  ++d_;
+  return *this;
+}
+CONSTCD14 inline day day::operator++(int) NOEXCEPT {
+  auto tmp(*this);
+  ++(*this);
+  return tmp;
+}
+CONSTCD14 inline day& day::operator--() NOEXCEPT {
+  --d_;
+  return *this;
+}
+CONSTCD14 inline day day::operator--(int) NOEXCEPT {
+  auto tmp(*this);
+  --(*this);
+  return tmp;
+}
+CONSTCD14 inline day& day::operator+=(const days& d) NOEXCEPT {
+  *this = *this + d;
+  return *this;
+}
+CONSTCD14 inline day& day::operator-=(const days& d) NOEXCEPT {
+  *this = *this - d;
+  return *this;
+}
+CONSTCD11 inline day::operator unsigned() const NOEXCEPT { return d_; }
+CONSTCD11 inline bool day::ok() const NOEXCEPT { return 1 <= d_ && d_ <= 31; }
 
 CONSTCD11
-inline
-bool
-operator==(const day& x, const day& y) NOEXCEPT
-{
-    return static_cast<unsigned>(x) == static_cast<unsigned>(y);
+inline bool operator==(const day& x, const day& y) NOEXCEPT {
+  return static_cast<unsigned>(x) == static_cast<unsigned>(y);
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const day& x, const day& y) NOEXCEPT
-{
-    return !(x == y);
-}
+inline bool operator!=(const day& x, const day& y) NOEXCEPT { return !(x == y); }
 
 CONSTCD11
-inline
-bool
-operator<(const day& x, const day& y) NOEXCEPT
-{
-    return static_cast<unsigned>(x) < static_cast<unsigned>(y);
+inline bool operator<(const day& x, const day& y) NOEXCEPT {
+  return static_cast<unsigned>(x) < static_cast<unsigned>(y);
 }
 
 CONSTCD11
-inline
-bool
-operator>(const day& x, const day& y) NOEXCEPT
-{
-    return y < x;
-}
+inline bool operator>(const day& x, const day& y) NOEXCEPT { return y < x; }
 
 CONSTCD11
-inline
-bool
-operator<=(const day& x, const day& y) NOEXCEPT
-{
-    return !(y < x);
-}
+inline bool operator<=(const day& x, const day& y) NOEXCEPT { return !(y < x); }
 
 CONSTCD11
-inline
-bool
-operator>=(const day& x, const day& y) NOEXCEPT
-{
-    return !(x < y);
-}
+inline bool operator>=(const day& x, const day& y) NOEXCEPT { return !(x < y); }
 
 CONSTCD11
-inline
-days
-operator-(const day& x, const day& y) NOEXCEPT
-{
-    return days{static_cast<days::rep>(static_cast<unsigned>(x)
-                                     - static_cast<unsigned>(y))};
+inline days operator-(const day& x, const day& y) NOEXCEPT {
+  return days{
+      static_cast<days::rep>(static_cast<unsigned>(x) - static_cast<unsigned>(y))};
 }
 
 CONSTCD11
-inline
-day
-operator+(const day& x, const days& y) NOEXCEPT
-{
-    return day{static_cast<unsigned>(x) + static_cast<unsigned>(y.count())};
+inline day operator+(const day& x, const days& y) NOEXCEPT {
+  return day{static_cast<unsigned>(x) + static_cast<unsigned>(y.count())};
 }
 
 CONSTCD11
-inline
-day
-operator+(const days& x, const day& y) NOEXCEPT
-{
-    return y + x;
-}
+inline day operator+(const days& x, const day& y) NOEXCEPT { return y + x; }
 
 CONSTCD11
-inline
-day
-operator-(const day& x, const days& y) NOEXCEPT
-{
-    return x + -y;
-}
+inline day operator-(const day& x, const days& y) NOEXCEPT { return x + -y; }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const day& d)
-{
-    detail::save_ostream<CharT, Traits> _(os);
-    os.fill('0');
-    os.flags(std::ios::dec | std::ios::right);
-    os.width(2);
-    os << static_cast<unsigned>(d);
-    if (!d.ok())
-        os << " is not a valid day";
-    return os;
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const day& d) {
+  detail::save_ostream<CharT, Traits> _(os);
+  os.fill('0');
+  os.flags(std::ios::dec | std::ios::right);
+  os.width(2);
+  os << static_cast<unsigned>(d);
+  if (!d.ok()) os << " is not a valid day";
+  return os;
 }
 
 // month
 
 CONSTCD11 inline month::month(unsigned m) NOEXCEPT : m_(static_cast<decltype(m_)>(m)) {}
-CONSTCD14 inline month& month::operator++() NOEXCEPT {*this += months{1}; return *this;}
-CONSTCD14 inline month month::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
-CONSTCD14 inline month& month::operator--() NOEXCEPT {*this -= months{1}; return *this;}
-CONSTCD14 inline month month::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
+CONSTCD14 inline month& month::operator++() NOEXCEPT {
+  *this += months{1};
+  return *this;
+}
+CONSTCD14 inline month month::operator++(int) NOEXCEPT {
+  auto tmp(*this);
+  ++(*this);
+  return tmp;
+}
+CONSTCD14 inline month& month::operator--() NOEXCEPT {
+  *this -= months{1};
+  return *this;
+}
+CONSTCD14 inline month month::operator--(int) NOEXCEPT {
+  auto tmp(*this);
+  --(*this);
+  return tmp;
+}
 
 CONSTCD14
-inline
-month&
-month::operator+=(const months& m) NOEXCEPT
-{
-    *this = *this + m;
-    return *this;
+inline month& month::operator+=(const months& m) NOEXCEPT {
+  *this = *this + m;
+  return *this;
 }
 
 CONSTCD14
-inline
-month&
-month::operator-=(const months& m) NOEXCEPT
-{
-    *this = *this - m;
-    return *this;
+inline month& month::operator-=(const months& m) NOEXCEPT {
+  *this = *this - m;
+  return *this;
 }
 
-CONSTCD11 inline month::operator unsigned() const NOEXCEPT {return m_;}
-CONSTCD11 inline bool month::ok() const NOEXCEPT {return 1 <= m_ && m_ <= 12;}
+CONSTCD11 inline month::operator unsigned() const NOEXCEPT { return m_; }
+CONSTCD11 inline bool month::ok() const NOEXCEPT { return 1 <= m_ && m_ <= 12; }
 
 CONSTCD11
-inline
-bool
-operator==(const month& x, const month& y) NOEXCEPT
-{
-    return static_cast<unsigned>(x) == static_cast<unsigned>(y);
+inline bool operator==(const month& x, const month& y) NOEXCEPT {
+  return static_cast<unsigned>(x) == static_cast<unsigned>(y);
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const month& x, const month& y) NOEXCEPT
-{
-    return !(x == y);
-}
+inline bool operator!=(const month& x, const month& y) NOEXCEPT { return !(x == y); }
 
 CONSTCD11
-inline
-bool
-operator<(const month& x, const month& y) NOEXCEPT
-{
-    return static_cast<unsigned>(x) < static_cast<unsigned>(y);
+inline bool operator<(const month& x, const month& y) NOEXCEPT {
+  return static_cast<unsigned>(x) < static_cast<unsigned>(y);
 }
 
 CONSTCD11
-inline
-bool
-operator>(const month& x, const month& y) NOEXCEPT
-{
-    return y < x;
-}
+inline bool operator>(const month& x, const month& y) NOEXCEPT { return y < x; }
 
 CONSTCD11
-inline
-bool
-operator<=(const month& x, const month& y) NOEXCEPT
-{
-    return !(y < x);
-}
+inline bool operator<=(const month& x, const month& y) NOEXCEPT { return !(y < x); }
 
 CONSTCD11
-inline
-bool
-operator>=(const month& x, const month& y) NOEXCEPT
-{
-    return !(x < y);
-}
+inline bool operator>=(const month& x, const month& y) NOEXCEPT { return !(x < y); }
 
 CONSTCD14
-inline
-months
-operator-(const month& x, const month& y) NOEXCEPT
-{
-    auto const d = static_cast<unsigned>(x) - static_cast<unsigned>(y);
-    return months(d <= 11 ? d : d + 12);
+inline months operator-(const month& x, const month& y) NOEXCEPT {
+  auto const d = static_cast<unsigned>(x) - static_cast<unsigned>(y);
+  return months(d <= 11 ? d : d + 12);
 }
 
 CONSTCD14
-inline
-month
-operator+(const month& x, const months& y) NOEXCEPT
-{
-    auto const mu = static_cast<long long>(static_cast<unsigned>(x)) + (y.count() - 1);
-    auto const yr = (mu >= 0 ? mu : mu-11) / 12;
-    return month{static_cast<unsigned>(mu - yr * 12 + 1)};
+inline month operator+(const month& x, const months& y) NOEXCEPT {
+  auto const mu = static_cast<long long>(static_cast<unsigned>(x)) + (y.count() - 1);
+  auto const yr = (mu >= 0 ? mu : mu - 11) / 12;
+  return month{static_cast<unsigned>(mu - yr * 12 + 1)};
 }
 
 CONSTCD14
-inline
-month
-operator+(const months& x, const month& y) NOEXCEPT
-{
-    return y + x;
-}
+inline month operator+(const months& x, const month& y) NOEXCEPT { return y + x; }
 
 CONSTCD14
-inline
-month
-operator-(const month& x, const months& y) NOEXCEPT
-{
-    return x + -y;
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month& m)
-{
-    if (m.ok())
-    {
-        CharT fmt[] = {'%', 'b', 0};
-        os << format(os.getloc(), fmt, m);
-    }
-    else
-        os << static_cast<unsigned>(m) << " is not a valid month";
-    return os;
+inline month operator-(const month& x, const months& y) NOEXCEPT { return x + -y; }
+
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const month& m) {
+  if (m.ok()) {
+    CharT fmt[] = {'%', 'b', 0};
+    os << format(os.getloc(), fmt, m);
+  } else
+    os << static_cast<unsigned>(m) << " is not a valid month";
+  return os;
 }
 
 // year
 
 CONSTCD11 inline year::year(int y) NOEXCEPT : y_(static_cast<decltype(y_)>(y)) {}
-CONSTCD14 inline year& year::operator++() NOEXCEPT {++y_; return *this;}
-CONSTCD14 inline year year::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
-CONSTCD14 inline year& year::operator--() NOEXCEPT {--y_; return *this;}
-CONSTCD14 inline year year::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
-CONSTCD14 inline year& year::operator+=(const years& y) NOEXCEPT {*this = *this + y; return *this;}
-CONSTCD14 inline year& year::operator-=(const years& y) NOEXCEPT {*this = *this - y; return *this;}
-CONSTCD11 inline year year::operator-() const NOEXCEPT {return year{-y_};}
-CONSTCD11 inline year year::operator+() const NOEXCEPT {return *this;}
+CONSTCD14 inline year& year::operator++() NOEXCEPT {
+  ++y_;
+  return *this;
+}
+CONSTCD14 inline year year::operator++(int) NOEXCEPT {
+  auto tmp(*this);
+  ++(*this);
+  return tmp;
+}
+CONSTCD14 inline year& year::operator--() NOEXCEPT {
+  --y_;
+  return *this;
+}
+CONSTCD14 inline year year::operator--(int) NOEXCEPT {
+  auto tmp(*this);
+  --(*this);
+  return tmp;
+}
+CONSTCD14 inline year& year::operator+=(const years& y) NOEXCEPT {
+  *this = *this + y;
+  return *this;
+}
+CONSTCD14 inline year& year::operator-=(const years& y) NOEXCEPT {
+  *this = *this - y;
+  return *this;
+}
+CONSTCD11 inline year year::operator-() const NOEXCEPT { return year{-y_}; }
+CONSTCD11 inline year year::operator+() const NOEXCEPT { return *this; }
 
 CONSTCD11
-inline
-bool
-year::is_leap() const NOEXCEPT
-{
-    return y_ % 4 == 0 && (y_ % 100 != 0 || y_ % 400 == 0);
+inline bool year::is_leap() const NOEXCEPT {
+  return y_ % 4 == 0 && (y_ % 100 != 0 || y_ % 400 == 0);
 }
 
-CONSTCD11 inline year::operator int() const NOEXCEPT {return y_;}
+CONSTCD11 inline year::operator int() const NOEXCEPT { return y_; }
 
 CONSTCD11
-inline
-bool
-year::ok() const NOEXCEPT
-{
-    return y_ != std::numeric_limits<short>::min();
-}
+inline bool year::ok() const NOEXCEPT { return y_ != std::numeric_limits<short>::min(); }
 
 CONSTCD11
-inline
-year
-year::min() NOEXCEPT
-{
-    return year{-32767};
-}
+inline year year::min() NOEXCEPT { return year{-32767}; }
 
 CONSTCD11
-inline
-year
-year::max() NOEXCEPT
-{
-    return year{32767};
-}
+inline year year::max() NOEXCEPT { return year{32767}; }
 
 CONSTCD11
-inline
-bool
-operator==(const year& x, const year& y) NOEXCEPT
-{
-    return static_cast<int>(x) == static_cast<int>(y);
+inline bool operator==(const year& x, const year& y) NOEXCEPT {
+  return static_cast<int>(x) == static_cast<int>(y);
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const year& x, const year& y) NOEXCEPT
-{
-    return !(x == y);
-}
+inline bool operator!=(const year& x, const year& y) NOEXCEPT { return !(x == y); }
 
 CONSTCD11
-inline
-bool
-operator<(const year& x, const year& y) NOEXCEPT
-{
-    return static_cast<int>(x) < static_cast<int>(y);
+inline bool operator<(const year& x, const year& y) NOEXCEPT {
+  return static_cast<int>(x) < static_cast<int>(y);
 }
 
 CONSTCD11
-inline
-bool
-operator>(const year& x, const year& y) NOEXCEPT
-{
-    return y < x;
-}
+inline bool operator>(const year& x, const year& y) NOEXCEPT { return y < x; }
 
 CONSTCD11
-inline
-bool
-operator<=(const year& x, const year& y) NOEXCEPT
-{
-    return !(y < x);
-}
+inline bool operator<=(const year& x, const year& y) NOEXCEPT { return !(y < x); }
 
 CONSTCD11
-inline
-bool
-operator>=(const year& x, const year& y) NOEXCEPT
-{
-    return !(x < y);
-}
+inline bool operator>=(const year& x, const year& y) NOEXCEPT { return !(x < y); }
 
 CONSTCD11
-inline
-years
-operator-(const year& x, const year& y) NOEXCEPT
-{
-    return years{static_cast<int>(x) - static_cast<int>(y)};
+inline years operator-(const year& x, const year& y) NOEXCEPT {
+  return years{static_cast<int>(x) - static_cast<int>(y)};
 }
 
 CONSTCD11
-inline
-year
-operator+(const year& x, const years& y) NOEXCEPT
-{
-    return year{static_cast<int>(x) + y.count()};
+inline year operator+(const year& x, const years& y) NOEXCEPT {
+  return year{static_cast<int>(x) + y.count()};
 }
 
 CONSTCD11
-inline
-year
-operator+(const years& x, const year& y) NOEXCEPT
-{
-    return y + x;
-}
+inline year operator+(const years& x, const year& y) NOEXCEPT { return y + x; }
 
 CONSTCD11
-inline
-year
-operator-(const year& x, const years& y) NOEXCEPT
-{
-    return year{static_cast<int>(x) - y.count()};
+inline year operator-(const year& x, const years& y) NOEXCEPT {
+  return year{static_cast<int>(x) - y.count()};
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year& y)
-{
-    detail::save_ostream<CharT, Traits> _(os);
-    os.fill('0');
-    os.flags(std::ios::dec | std::ios::internal);
-    os.width(4 + (y < year{0}));
-    os << static_cast<int>(y);
-    if (!y.ok())
-        os << " is not a valid year";
-    return os;
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const year& y) {
+  detail::save_ostream<CharT, Traits> _(os);
+  os.fill('0');
+  os.flags(std::ios::dec | std::ios::internal);
+  os.width(4 + (y < year{0}));
+  os << static_cast<int>(y);
+  if (!y.ok()) os << " is not a valid year";
+  return os;
 }
 
 // weekday
 
 CONSTCD14
-inline
-unsigned char
-weekday::weekday_from_days(int z) NOEXCEPT
-{
-    auto u = static_cast<unsigned>(z);
-    return static_cast<unsigned char>(z >= -4 ? (u+4) % 7 : u % 7);
+inline unsigned char weekday::weekday_from_days(int z) NOEXCEPT {
+  auto u = static_cast<unsigned>(z);
+  return static_cast<unsigned char>(z >= -4 ? (u + 4) % 7 : u % 7);
 }
 
 CONSTCD11
-inline
-weekday::weekday(unsigned wd) NOEXCEPT
-    : wd_(static_cast<decltype(wd_)>(wd != 7 ? wd : 0))
-    {}
+inline weekday::weekday(unsigned wd) NOEXCEPT
+    : wd_(static_cast<decltype(wd_)>(wd != 7 ? wd : 0)) {}
 
 CONSTCD14
-inline
-weekday::weekday(const sys_days& dp) NOEXCEPT
-    : wd_(weekday_from_days(dp.time_since_epoch().count()))
-    {}
+inline weekday::weekday(const sys_days& dp) NOEXCEPT
+    : wd_(weekday_from_days(dp.time_since_epoch().count())) {}
 
 CONSTCD14
-inline
-weekday::weekday(const local_days& dp) NOEXCEPT
-    : wd_(weekday_from_days(dp.time_since_epoch().count()))
-    {}
+inline weekday::weekday(const local_days& dp) NOEXCEPT
+    : wd_(weekday_from_days(dp.time_since_epoch().count())) {}
 
-CONSTCD14 inline weekday& weekday::operator++() NOEXCEPT {*this += days{1}; return *this;}
-CONSTCD14 inline weekday weekday::operator++(int) NOEXCEPT {auto tmp(*this); ++(*this); return tmp;}
-CONSTCD14 inline weekday& weekday::operator--() NOEXCEPT {*this -= days{1}; return *this;}
-CONSTCD14 inline weekday weekday::operator--(int) NOEXCEPT {auto tmp(*this); --(*this); return tmp;}
+CONSTCD14 inline weekday& weekday::operator++() NOEXCEPT {
+  *this += days{1};
+  return *this;
+}
+CONSTCD14 inline weekday weekday::operator++(int) NOEXCEPT {
+  auto tmp(*this);
+  ++(*this);
+  return tmp;
+}
+CONSTCD14 inline weekday& weekday::operator--() NOEXCEPT {
+  *this -= days{1};
+  return *this;
+}
+CONSTCD14 inline weekday weekday::operator--(int) NOEXCEPT {
+  auto tmp(*this);
+  --(*this);
+  return tmp;
+}
 
 CONSTCD14
-inline
-weekday&
-weekday::operator+=(const days& d) NOEXCEPT
-{
-    *this = *this + d;
-    return *this;
+inline weekday& weekday::operator+=(const days& d) NOEXCEPT {
+  *this = *this + d;
+  return *this;
 }
 
 CONSTCD14
-inline
-weekday&
-weekday::operator-=(const days& d) NOEXCEPT
-{
-    *this = *this - d;
-    return *this;
+inline weekday& weekday::operator-=(const days& d) NOEXCEPT {
+  *this = *this - d;
+  return *this;
 }
 
-CONSTCD11 inline bool weekday::ok() const NOEXCEPT {return wd_ <= 6;}
+CONSTCD11 inline bool weekday::ok() const NOEXCEPT { return wd_ <= 6; }
 
 CONSTCD11
-inline
-unsigned weekday::c_encoding() const NOEXCEPT
-{
-    return unsigned{wd_};
-}
+inline unsigned weekday::c_encoding() const NOEXCEPT { return unsigned{wd_}; }
 
 CONSTCD11
-inline
-unsigned weekday::iso_encoding() const NOEXCEPT
-{
-    return unsigned{((wd_ == 0u) ? 7u : wd_)};
+inline unsigned weekday::iso_encoding() const NOEXCEPT {
+  return unsigned{((wd_ == 0u) ? 7u : wd_)};
 }
 
 CONSTCD11
-inline
-bool
-operator==(const weekday& x, const weekday& y) NOEXCEPT
-{
-    return x.wd_ == y.wd_;
+inline bool operator==(const weekday& x, const weekday& y) NOEXCEPT {
+  return x.wd_ == y.wd_;
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const weekday& x, const weekday& y) NOEXCEPT
-{
-    return !(x == y);
-}
+inline bool operator!=(const weekday& x, const weekday& y) NOEXCEPT { return !(x == y); }
 
 CONSTCD14
-inline
-days
-operator-(const weekday& x, const weekday& y) NOEXCEPT
-{
-    auto const wdu = x.wd_ - y.wd_;
-    auto const wk = (wdu >= 0 ? wdu : wdu-6) / 7;
-    return days{wdu - wk * 7};
+inline days operator-(const weekday& x, const weekday& y) NOEXCEPT {
+  auto const wdu = x.wd_ - y.wd_;
+  auto const wk = (wdu >= 0 ? wdu : wdu - 6) / 7;
+  return days{wdu - wk * 7};
 }
 
 CONSTCD14
-inline
-weekday
-operator+(const weekday& x, const days& y) NOEXCEPT
-{
-    auto const wdu = static_cast<long long>(static_cast<unsigned>(x.wd_)) + y.count();
-    auto const wk = (wdu >= 0 ? wdu : wdu-6) / 7;
-    return weekday{static_cast<unsigned>(wdu - wk * 7)};
+inline weekday operator+(const weekday& x, const days& y) NOEXCEPT {
+  auto const wdu = static_cast<long long>(static_cast<unsigned>(x.wd_)) + y.count();
+  auto const wk = (wdu >= 0 ? wdu : wdu - 6) / 7;
+  return weekday{static_cast<unsigned>(wdu - wk * 7)};
 }
 
 CONSTCD14
-inline
-weekday
-operator+(const days& x, const weekday& y) NOEXCEPT
-{
-    return y + x;
-}
+inline weekday operator+(const days& x, const weekday& y) NOEXCEPT { return y + x; }
 
 CONSTCD14
-inline
-weekday
-operator-(const weekday& x, const days& y) NOEXCEPT
-{
-    return x + -y;
-}
-
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday& wd)
-{
-    if (wd.ok())
-    {
-        CharT fmt[] = {'%', 'a', 0};
-        os << format(fmt, wd);
-    }
-    else
-        os << static_cast<unsigned>(wd.wd_) << " is not a valid weekday";
-    return os;
+inline weekday operator-(const weekday& x, const days& y) NOEXCEPT { return x + -y; }
+
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const weekday& wd) {
+  if (wd.ok()) {
+    CharT fmt[] = {'%', 'a', 0};
+    os << format(fmt, wd);
+  } else
+    os << static_cast<unsigned>(wd.wd_) << " is not a valid weekday";
+  return os;
 }
 
 #if !defined(_MSC_VER) || (_MSC_VER >= 1900)
-inline namespace literals
-{
+inline namespace literals {
 
 CONSTCD11
-inline
-date::day
-operator "" _d(unsigned long long d) NOEXCEPT
-{
-    return date::day{static_cast<unsigned>(d)};
+inline date::day operator"" _d(unsigned long long d) NOEXCEPT {
+  return date::day{static_cast<unsigned>(d)};
 }
 
 CONSTCD11
-inline
-date::year
-operator "" _y(unsigned long long y) NOEXCEPT
-{
-    return date::year(static_cast<int>(y));
+inline date::year operator"" _y(unsigned long long y) NOEXCEPT {
+  return date::year(static_cast<int>(y));
 }
 #endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)
 
@@ -1953,1743 +1674,1183 @@ CONSTDATA date::weekday Sunday{7};
 // weekday_indexed
 
 CONSTCD11
-inline
-weekday
-weekday_indexed::weekday() const NOEXCEPT
-{
-    return date::weekday{static_cast<unsigned>(wd_)};
+inline weekday weekday_indexed::weekday() const NOEXCEPT {
+  return date::weekday{static_cast<unsigned>(wd_)};
 }
 
-CONSTCD11 inline unsigned weekday_indexed::index() const NOEXCEPT {return index_;}
+CONSTCD11 inline unsigned weekday_indexed::index() const NOEXCEPT { return index_; }
 
 CONSTCD11
-inline
-bool
-weekday_indexed::ok() const NOEXCEPT
-{
-    return weekday().ok() && 1 <= index_ && index_ <= 5;
+inline bool weekday_indexed::ok() const NOEXCEPT {
+  return weekday().ok() && 1 <= index_ && index_ <= 5;
 }
 
 #ifdef __GNUC__
-#  pragma GCC diagnostic push
-#  pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
 #endif  // __GNUC__
 
 CONSTCD11
-inline
-weekday_indexed::weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT
-    : wd_(static_cast<decltype(wd_)>(static_cast<unsigned>(wd.wd_)))
-    , index_(static_cast<decltype(index_)>(index))
-    {}
+inline weekday_indexed::weekday_indexed(const date::weekday& wd, unsigned index) NOEXCEPT
+    : wd_(static_cast<decltype(wd_)>(static_cast<unsigned>(wd.wd_))),
+      index_(static_cast<decltype(index_)>(index)) {}
 
 #ifdef __GNUC__
-#  pragma GCC diagnostic pop
+#pragma GCC diagnostic pop
 #endif  // __GNUC__
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_indexed& wdi)
-{
-    os << wdi.weekday() << '[' << wdi.index();
-    if (!(1 <= wdi.index() && wdi.index() <= 5))
-        os << " is not a valid index";
-    os << ']';
-    return os;
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const weekday_indexed& wdi) {
+  os << wdi.weekday() << '[' << wdi.index();
+  if (!(1 <= wdi.index() && wdi.index() <= 5)) os << " is not a valid index";
+  os << ']';
+  return os;
 }
 
 CONSTCD11
-inline
-weekday_indexed
-weekday::operator[](unsigned index) const NOEXCEPT
-{
-    return {*this, index};
+inline weekday_indexed weekday::operator[](unsigned index) const NOEXCEPT {
+  return {*this, index};
 }
 
 CONSTCD11
-inline
-bool
-operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT
-{
-    return x.weekday() == y.weekday() && x.index() == y.index();
+inline bool operator==(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT {
+  return x.weekday() == y.weekday() && x.index() == y.index();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const weekday_indexed& x, const weekday_indexed& y) NOEXCEPT {
+  return !(x == y);
 }
 
 // weekday_last
 
-CONSTCD11 inline date::weekday weekday_last::weekday() const NOEXCEPT {return wd_;}
-CONSTCD11 inline bool weekday_last::ok() const NOEXCEPT {return wd_.ok();}
+CONSTCD11 inline date::weekday weekday_last::weekday() const NOEXCEPT { return wd_; }
+CONSTCD11 inline bool weekday_last::ok() const NOEXCEPT { return wd_.ok(); }
 CONSTCD11 inline weekday_last::weekday_last(const date::weekday& wd) NOEXCEPT : wd_(wd) {}
 
 CONSTCD11
-inline
-bool
-operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT
-{
-    return x.weekday() == y.weekday();
+inline bool operator==(const weekday_last& x, const weekday_last& y) NOEXCEPT {
+  return x.weekday() == y.weekday();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const weekday_last& x, const weekday_last& y) NOEXCEPT {
+  return !(x == y);
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const weekday_last& wdl)
-{
-    return os << wdl.weekday() << "[last]";
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const weekday_last& wdl) {
+  return os << wdl.weekday() << "[last]";
 }
 
 CONSTCD11
-inline
-weekday_last
-weekday::operator[](last_spec) const NOEXCEPT
-{
-    return weekday_last{*this};
+inline weekday_last weekday::operator[](last_spec) const NOEXCEPT {
+  return weekday_last{*this};
 }
 
 // year_month
 
 CONSTCD11
-inline
-year_month::year_month(const date::year& y, const date::month& m) NOEXCEPT
-    : y_(y)
-    , m_(m)
-    {}
+inline year_month::year_month(const date::year& y, const date::month& m) NOEXCEPT
+    : y_(y),
+      m_(m) {}
 
-CONSTCD11 inline year year_month::year() const NOEXCEPT {return y_;}
-CONSTCD11 inline month year_month::month() const NOEXCEPT {return m_;}
-CONSTCD11 inline bool year_month::ok() const NOEXCEPT {return y_.ok() && m_.ok();}
+CONSTCD11 inline year year_month::year() const NOEXCEPT { return y_; }
+CONSTCD11 inline month year_month::month() const NOEXCEPT { return m_; }
+CONSTCD11 inline bool year_month::ok() const NOEXCEPT { return y_.ok() && m_.ok(); }
 
-template<class>
-CONSTCD14
-inline
-year_month&
-year_month::operator+=(const months& dm) NOEXCEPT
-{
-    *this = *this + dm;
-    return *this;
+template <class>
+CONSTCD14 inline year_month& year_month::operator+=(const months& dm) NOEXCEPT {
+  *this = *this + dm;
+  return *this;
 }
 
-template<class>
-CONSTCD14
-inline
-year_month&
-year_month::operator-=(const months& dm) NOEXCEPT
-{
-    *this = *this - dm;
-    return *this;
+template <class>
+CONSTCD14 inline year_month& year_month::operator-=(const months& dm) NOEXCEPT {
+  *this = *this - dm;
+  return *this;
 }
 
 CONSTCD14
-inline
-year_month&
-year_month::operator+=(const years& dy) NOEXCEPT
-{
-    *this = *this + dy;
-    return *this;
+inline year_month& year_month::operator+=(const years& dy) NOEXCEPT {
+  *this = *this + dy;
+  return *this;
 }
 
 CONSTCD14
-inline
-year_month&
-year_month::operator-=(const years& dy) NOEXCEPT
-{
-    *this = *this - dy;
-    return *this;
+inline year_month& year_month::operator-=(const years& dy) NOEXCEPT {
+  *this = *this - dy;
+  return *this;
 }
 
 CONSTCD11
-inline
-bool
-operator==(const year_month& x, const year_month& y) NOEXCEPT
-{
-    return x.year() == y.year() && x.month() == y.month();
+inline bool operator==(const year_month& x, const year_month& y) NOEXCEPT {
+  return x.year() == y.year() && x.month() == y.month();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const year_month& x, const year_month& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const year_month& x, const year_month& y) NOEXCEPT {
+  return !(x == y);
 }
 
 CONSTCD11
-inline
-bool
-operator<(const year_month& x, const year_month& y) NOEXCEPT
-{
-    return x.year() < y.year() ? true
-        : (x.year() > y.year() ? false
-        : (x.month() < y.month()));
+inline bool operator<(const year_month& x, const year_month& y) NOEXCEPT {
+  return x.year() < y.year() ? true
+                             : (x.year() > y.year() ? false : (x.month() < y.month()));
 }
 
 CONSTCD11
-inline
-bool
-operator>(const year_month& x, const year_month& y) NOEXCEPT
-{
-    return y < x;
-}
+inline bool operator>(const year_month& x, const year_month& y) NOEXCEPT { return y < x; }
 
 CONSTCD11
-inline
-bool
-operator<=(const year_month& x, const year_month& y) NOEXCEPT
-{
-    return !(y < x);
+inline bool operator<=(const year_month& x, const year_month& y) NOEXCEPT {
+  return !(y < x);
 }
 
 CONSTCD11
-inline
-bool
-operator>=(const year_month& x, const year_month& y) NOEXCEPT
-{
-    return !(x < y);
+inline bool operator>=(const year_month& x, const year_month& y) NOEXCEPT {
+  return !(x < y);
 }
 
-template<class>
-CONSTCD14
-inline
-year_month
-operator+(const year_month& ym, const months& dm) NOEXCEPT
-{
-    auto dmi = static_cast<int>(static_cast<unsigned>(ym.month())) - 1 + dm.count();
-    auto dy = (dmi >= 0 ? dmi : dmi-11) / 12;
-    dmi = dmi - dy * 12 + 1;
-    return (ym.year() + years(dy)) / month(static_cast<unsigned>(dmi));
+template <class>
+CONSTCD14 inline year_month operator+(const year_month& ym, const months& dm) NOEXCEPT {
+  auto dmi = static_cast<int>(static_cast<unsigned>(ym.month())) - 1 + dm.count();
+  auto dy = (dmi >= 0 ? dmi : dmi - 11) / 12;
+  dmi = dmi - dy * 12 + 1;
+  return (ym.year() + years(dy)) / month(static_cast<unsigned>(dmi));
 }
 
-template<class>
-CONSTCD14
-inline
-year_month
-operator+(const months& dm, const year_month& ym) NOEXCEPT
-{
-    return ym + dm;
+template <class>
+CONSTCD14 inline year_month operator+(const months& dm, const year_month& ym) NOEXCEPT {
+  return ym + dm;
 }
 
-template<class>
-CONSTCD14
-inline
-year_month
-operator-(const year_month& ym, const months& dm) NOEXCEPT
-{
-    return ym + -dm;
+template <class>
+CONSTCD14 inline year_month operator-(const year_month& ym, const months& dm) NOEXCEPT {
+  return ym + -dm;
 }
 
 CONSTCD11
-inline
-months
-operator-(const year_month& x, const year_month& y) NOEXCEPT
-{
-    return (x.year() - y.year()) +
-            months(static_cast<unsigned>(x.month()) - static_cast<unsigned>(y.month()));
+inline months operator-(const year_month& x, const year_month& y) NOEXCEPT {
+  return (x.year() - y.year()) +
+         months(static_cast<unsigned>(x.month()) - static_cast<unsigned>(y.month()));
 }
 
 CONSTCD11
-inline
-year_month
-operator+(const year_month& ym, const years& dy) NOEXCEPT
-{
-    return (ym.year() + dy) / ym.month();
+inline year_month operator+(const year_month& ym, const years& dy) NOEXCEPT {
+  return (ym.year() + dy) / ym.month();
 }
 
 CONSTCD11
-inline
-year_month
-operator+(const years& dy, const year_month& ym) NOEXCEPT
-{
-    return ym + dy;
+inline year_month operator+(const years& dy, const year_month& ym) NOEXCEPT {
+  return ym + dy;
 }
 
 CONSTCD11
-inline
-year_month
-operator-(const year_month& ym, const years& dy) NOEXCEPT
-{
-    return ym + -dy;
+inline year_month operator-(const year_month& ym, const years& dy) NOEXCEPT {
+  return ym + -dy;
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month& ym)
-{
-    return os << ym.year() << '/' << ym.month();
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const year_month& ym) {
+  return os << ym.year() << '/' << ym.month();
 }
 
 // month_day
 
 CONSTCD11
-inline
-month_day::month_day(const date::month& m, const date::day& d) NOEXCEPT
-    : m_(m)
-    , d_(d)
-    {}
+inline month_day::month_day(const date::month& m, const date::day& d) NOEXCEPT : m_(m),
+                                                                                 d_(d) {}
 
-CONSTCD11 inline date::month month_day::month() const NOEXCEPT {return m_;}
-CONSTCD11 inline date::day month_day::day() const NOEXCEPT {return d_;}
+CONSTCD11 inline date::month month_day::month() const NOEXCEPT { return m_; }
+CONSTCD11 inline date::day month_day::day() const NOEXCEPT { return d_; }
 
 CONSTCD14
-inline
-bool
-month_day::ok() const NOEXCEPT
-{
-    CONSTDATA date::day d[] =
-    {
-        date::day(31), date::day(29), date::day(31),
-        date::day(30), date::day(31), date::day(30),
-        date::day(31), date::day(31), date::day(30),
-        date::day(31), date::day(30), date::day(31)
-    };
-    return m_.ok() && date::day{1} <= d_ && d_ <= d[static_cast<unsigned>(m_)-1];
+inline bool month_day::ok() const NOEXCEPT {
+  CONSTDATA date::day d[] = {date::day(31), date::day(29), date::day(31), date::day(30),
+                             date::day(31), date::day(30), date::day(31), date::day(31),
+                             date::day(30), date::day(31), date::day(30), date::day(31)};
+  return m_.ok() && date::day{1} <= d_ && d_ <= d[static_cast<unsigned>(m_) - 1];
 }
 
 CONSTCD11
-inline
-bool
-operator==(const month_day& x, const month_day& y) NOEXCEPT
-{
-    return x.month() == y.month() && x.day() == y.day();
+inline bool operator==(const month_day& x, const month_day& y) NOEXCEPT {
+  return x.month() == y.month() && x.day() == y.day();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const month_day& x, const month_day& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const month_day& x, const month_day& y) NOEXCEPT {
+  return !(x == y);
 }
 
 CONSTCD11
-inline
-bool
-operator<(const month_day& x, const month_day& y) NOEXCEPT
-{
-    return x.month() < y.month() ? true
-        : (x.month() > y.month() ? false
-        : (x.day() < y.day()));
+inline bool operator<(const month_day& x, const month_day& y) NOEXCEPT {
+  return x.month() < y.month() ? true
+                               : (x.month() > y.month() ? false : (x.day() < y.day()));
 }
 
 CONSTCD11
-inline
-bool
-operator>(const month_day& x, const month_day& y) NOEXCEPT
-{
-    return y < x;
-}
+inline bool operator>(const month_day& x, const month_day& y) NOEXCEPT { return y < x; }
 
 CONSTCD11
-inline
-bool
-operator<=(const month_day& x, const month_day& y) NOEXCEPT
-{
-    return !(y < x);
+inline bool operator<=(const month_day& x, const month_day& y) NOEXCEPT {
+  return !(y < x);
 }
 
 CONSTCD11
-inline
-bool
-operator>=(const month_day& x, const month_day& y) NOEXCEPT
-{
-    return !(x < y);
+inline bool operator>=(const month_day& x, const month_day& y) NOEXCEPT {
+  return !(x < y);
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_day& md)
-{
-    return os << md.month() << '/' << md.day();
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const month_day& md) {
+  return os << md.month() << '/' << md.day();
 }
 
 // month_day_last
 
-CONSTCD11 inline month month_day_last::month() const NOEXCEPT {return m_;}
-CONSTCD11 inline bool month_day_last::ok() const NOEXCEPT {return m_.ok();}
+CONSTCD11 inline month month_day_last::month() const NOEXCEPT { return m_; }
+CONSTCD11 inline bool month_day_last::ok() const NOEXCEPT { return m_.ok(); }
 CONSTCD11 inline month_day_last::month_day_last(const date::month& m) NOEXCEPT : m_(m) {}
 
 CONSTCD11
-inline
-bool
-operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
-    return x.month() == y.month();
+inline bool operator==(const month_day_last& x, const month_day_last& y) NOEXCEPT {
+  return x.month() == y.month();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const month_day_last& x, const month_day_last& y) NOEXCEPT {
+  return !(x == y);
 }
 
 CONSTCD11
-inline
-bool
-operator<(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
-    return x.month() < y.month();
+inline bool operator<(const month_day_last& x, const month_day_last& y) NOEXCEPT {
+  return x.month() < y.month();
 }
 
 CONSTCD11
-inline
-bool
-operator>(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
-    return y < x;
+inline bool operator>(const month_day_last& x, const month_day_last& y) NOEXCEPT {
+  return y < x;
 }
 
 CONSTCD11
-inline
-bool
-operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
-    return !(y < x);
+inline bool operator<=(const month_day_last& x, const month_day_last& y) NOEXCEPT {
+  return !(y < x);
 }
 
 CONSTCD11
-inline
-bool
-operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT
-{
-    return !(x < y);
+inline bool operator>=(const month_day_last& x, const month_day_last& y) NOEXCEPT {
+  return !(x < y);
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_day_last& mdl)
-{
-    return os << mdl.month() << "/last";
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const month_day_last& mdl) {
+  return os << mdl.month() << "/last";
 }
 
 // month_weekday
 
 CONSTCD11
-inline
-month_weekday::month_weekday(const date::month& m,
-                             const date::weekday_indexed& wdi) NOEXCEPT
-    : m_(m)
-    , wdi_(wdi)
-    {}
+inline month_weekday::month_weekday(const date::month& m,
+                                    const date::weekday_indexed& wdi) NOEXCEPT
+    : m_(m),
+      wdi_(wdi) {}
 
-CONSTCD11 inline month month_weekday::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline month month_weekday::month() const NOEXCEPT { return m_; }
 
 CONSTCD11
-inline
-weekday_indexed
-month_weekday::weekday_indexed() const NOEXCEPT
-{
-    return wdi_;
-}
+inline weekday_indexed month_weekday::weekday_indexed() const NOEXCEPT { return wdi_; }
 
 CONSTCD11
-inline
-bool
-month_weekday::ok() const NOEXCEPT
-{
-    return m_.ok() && wdi_.ok();
-}
+inline bool month_weekday::ok() const NOEXCEPT { return m_.ok() && wdi_.ok(); }
 
 CONSTCD11
-inline
-bool
-operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT
-{
-    return x.month() == y.month() && x.weekday_indexed() == y.weekday_indexed();
+inline bool operator==(const month_weekday& x, const month_weekday& y) NOEXCEPT {
+  return x.month() == y.month() && x.weekday_indexed() == y.weekday_indexed();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const month_weekday& x, const month_weekday& y) NOEXCEPT {
+  return !(x == y);
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday& mwd)
-{
-    return os << mwd.month() << '/' << mwd.weekday_indexed();
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const month_weekday& mwd) {
+  return os << mwd.month() << '/' << mwd.weekday_indexed();
 }
 
 // month_weekday_last
 
 CONSTCD11
-inline
-month_weekday_last::month_weekday_last(const date::month& m,
-                                       const date::weekday_last& wdl) NOEXCEPT
-    : m_(m)
-    , wdl_(wdl)
-    {}
+inline month_weekday_last::month_weekday_last(const date::month& m,
+                                              const date::weekday_last& wdl) NOEXCEPT
+    : m_(m),
+      wdl_(wdl) {}
 
-CONSTCD11 inline month month_weekday_last::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline month month_weekday_last::month() const NOEXCEPT { return m_; }
 
 CONSTCD11
-inline
-weekday_last
-month_weekday_last::weekday_last() const NOEXCEPT
-{
-    return wdl_;
-}
+inline weekday_last month_weekday_last::weekday_last() const NOEXCEPT { return wdl_; }
 
 CONSTCD11
-inline
-bool
-month_weekday_last::ok() const NOEXCEPT
-{
-    return m_.ok() && wdl_.ok();
-}
+inline bool month_weekday_last::ok() const NOEXCEPT { return m_.ok() && wdl_.ok(); }
 
 CONSTCD11
-inline
-bool
-operator==(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT
-{
-    return x.month() == y.month() && x.weekday_last() == y.weekday_last();
+inline bool operator==(const month_weekday_last& x,
+                       const month_weekday_last& y) NOEXCEPT {
+  return x.month() == y.month() && x.weekday_last() == y.weekday_last();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const month_weekday_last& x, const month_weekday_last& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const month_weekday_last& x,
+                       const month_weekday_last& y) NOEXCEPT {
+  return !(x == y);
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const month_weekday_last& mwdl)
-{
-    return os << mwdl.month() << '/' << mwdl.weekday_last();
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const month_weekday_last& mwdl) {
+  return os << mwdl.month() << '/' << mwdl.weekday_last();
 }
 
 // year_month_day_last
 
 CONSTCD11
-inline
-year_month_day_last::year_month_day_last(const date::year& y,
-                                         const date::month_day_last& mdl) NOEXCEPT
-    : y_(y)
-    , mdl_(mdl)
-    {}
+inline year_month_day_last::year_month_day_last(const date::year& y,
+                                                const date::month_day_last& mdl) NOEXCEPT
+    : y_(y),
+      mdl_(mdl) {}
 
-template<class>
-CONSTCD14
-inline
-year_month_day_last&
-year_month_day_last::operator+=(const months& m) NOEXCEPT
-{
-    *this = *this + m;
-    return *this;
+template <class>
+CONSTCD14 inline year_month_day_last& year_month_day_last::operator+=(
+    const months& m) NOEXCEPT {
+  *this = *this + m;
+  return *this;
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_day_last&
-year_month_day_last::operator-=(const months& m) NOEXCEPT
-{
-    *this = *this - m;
-    return *this;
+template <class>
+CONSTCD14 inline year_month_day_last& year_month_day_last::operator-=(
+    const months& m) NOEXCEPT {
+  *this = *this - m;
+  return *this;
 }
 
 CONSTCD14
-inline
-year_month_day_last&
-year_month_day_last::operator+=(const years& y) NOEXCEPT
-{
-    *this = *this + y;
-    return *this;
+inline year_month_day_last& year_month_day_last::operator+=(const years& y) NOEXCEPT {
+  *this = *this + y;
+  return *this;
 }
 
 CONSTCD14
-inline
-year_month_day_last&
-year_month_day_last::operator-=(const years& y) NOEXCEPT
-{
-    *this = *this - y;
-    return *this;
+inline year_month_day_last& year_month_day_last::operator-=(const years& y) NOEXCEPT {
+  *this = *this - y;
+  return *this;
 }
 
-CONSTCD11 inline year year_month_day_last::year() const NOEXCEPT {return y_;}
-CONSTCD11 inline month year_month_day_last::month() const NOEXCEPT {return mdl_.month();}
+CONSTCD11 inline year year_month_day_last::year() const NOEXCEPT { return y_; }
+CONSTCD11 inline month year_month_day_last::month() const NOEXCEPT {
+  return mdl_.month();
+}
 
 CONSTCD11
-inline
-month_day_last
-year_month_day_last::month_day_last() const NOEXCEPT
-{
-    return mdl_;
+inline month_day_last year_month_day_last::month_day_last() const NOEXCEPT {
+  return mdl_;
 }
 
 CONSTCD14
-inline
-day
-year_month_day_last::day() const NOEXCEPT
-{
-    CONSTDATA date::day d[] =
-    {
-        date::day(31), date::day(28), date::day(31),
-        date::day(30), date::day(31), date::day(30),
-        date::day(31), date::day(31), date::day(30),
-        date::day(31), date::day(30), date::day(31)
-    };
-    return (month() != February || !y_.is_leap()) && mdl_.ok() ?
-        d[static_cast<unsigned>(month()) - 1] : date::day{29};
+inline day year_month_day_last::day() const NOEXCEPT {
+  CONSTDATA date::day d[] = {date::day(31), date::day(28), date::day(31), date::day(30),
+                             date::day(31), date::day(30), date::day(31), date::day(31),
+                             date::day(30), date::day(31), date::day(30), date::day(31)};
+  return (month() != February || !y_.is_leap()) && mdl_.ok()
+             ? d[static_cast<unsigned>(month()) - 1]
+             : date::day{29};
 }
 
 CONSTCD14
-inline
-year_month_day_last::operator sys_days() const NOEXCEPT
-{
-    return sys_days(year()/month()/day());
+inline year_month_day_last::operator sys_days() const NOEXCEPT {
+  return sys_days(year() / month() / day());
 }
 
 CONSTCD14
-inline
-year_month_day_last::operator local_days() const NOEXCEPT
-{
-    return local_days(year()/month()/day());
+inline year_month_day_last::operator local_days() const NOEXCEPT {
+  return local_days(year() / month() / day());
 }
 
 CONSTCD11
-inline
-bool
-year_month_day_last::ok() const NOEXCEPT
-{
-    return y_.ok() && mdl_.ok();
-}
+inline bool year_month_day_last::ok() const NOEXCEPT { return y_.ok() && mdl_.ok(); }
 
 CONSTCD11
-inline
-bool
-operator==(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
-    return x.year() == y.year() && x.month_day_last() == y.month_day_last();
+inline bool operator==(const year_month_day_last& x,
+                       const year_month_day_last& y) NOEXCEPT {
+  return x.year() == y.year() && x.month_day_last() == y.month_day_last();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const year_month_day_last& x,
+                       const year_month_day_last& y) NOEXCEPT {
+  return !(x == y);
 }
 
 CONSTCD11
-inline
-bool
-operator<(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
-    return x.year() < y.year() ? true
-        : (x.year() > y.year() ? false
-        : (x.month_day_last() < y.month_day_last()));
+inline bool operator<(const year_month_day_last& x,
+                      const year_month_day_last& y) NOEXCEPT {
+  return x.year() < y.year()
+             ? true
+             : (x.year() > y.year() ? false : (x.month_day_last() < y.month_day_last()));
 }
 
 CONSTCD11
-inline
-bool
-operator>(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
-    return y < x;
+inline bool operator>(const year_month_day_last& x,
+                      const year_month_day_last& y) NOEXCEPT {
+  return y < x;
 }
 
 CONSTCD11
-inline
-bool
-operator<=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
-    return !(y < x);
+inline bool operator<=(const year_month_day_last& x,
+                       const year_month_day_last& y) NOEXCEPT {
+  return !(y < x);
 }
 
 CONSTCD11
-inline
-bool
-operator>=(const year_month_day_last& x, const year_month_day_last& y) NOEXCEPT
-{
-    return !(x < y);
+inline bool operator>=(const year_month_day_last& x,
+                       const year_month_day_last& y) NOEXCEPT {
+  return !(x < y);
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day_last& ymdl)
-{
-    return os << ymdl.year() << '/' << ymdl.month_day_last();
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const year_month_day_last& ymdl) {
+  return os << ymdl.year() << '/' << ymdl.month_day_last();
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_day_last
-operator+(const year_month_day_last& ymdl, const months& dm) NOEXCEPT
-{
-    return (ymdl.year() / ymdl.month() + dm) / last;
+template <class>
+CONSTCD14 inline year_month_day_last operator+(const year_month_day_last& ymdl,
+                                               const months& dm) NOEXCEPT {
+  return (ymdl.year() / ymdl.month() + dm) / last;
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_day_last
-operator+(const months& dm, const year_month_day_last& ymdl) NOEXCEPT
-{
-    return ymdl + dm;
+template <class>
+CONSTCD14 inline year_month_day_last operator+(const months& dm,
+                                               const year_month_day_last& ymdl) NOEXCEPT {
+  return ymdl + dm;
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_day_last
-operator-(const year_month_day_last& ymdl, const months& dm) NOEXCEPT
-{
-    return ymdl + (-dm);
+template <class>
+CONSTCD14 inline year_month_day_last operator-(const year_month_day_last& ymdl,
+                                               const months& dm) NOEXCEPT {
+  return ymdl + (-dm);
 }
 
 CONSTCD11
-inline
-year_month_day_last
-operator+(const year_month_day_last& ymdl, const years& dy) NOEXCEPT
-{
-    return {ymdl.year()+dy, ymdl.month_day_last()};
+inline year_month_day_last operator+(const year_month_day_last& ymdl,
+                                     const years& dy) NOEXCEPT {
+  return {ymdl.year() + dy, ymdl.month_day_last()};
 }
 
 CONSTCD11
-inline
-year_month_day_last
-operator+(const years& dy, const year_month_day_last& ymdl) NOEXCEPT
-{
-    return ymdl + dy;
+inline year_month_day_last operator+(const years& dy,
+                                     const year_month_day_last& ymdl) NOEXCEPT {
+  return ymdl + dy;
 }
 
 CONSTCD11
-inline
-year_month_day_last
-operator-(const year_month_day_last& ymdl, const years& dy) NOEXCEPT
-{
-    return ymdl + (-dy);
+inline year_month_day_last operator-(const year_month_day_last& ymdl,
+                                     const years& dy) NOEXCEPT {
+  return ymdl + (-dy);
 }
 
 // year_month_day
 
 CONSTCD11
-inline
-year_month_day::year_month_day(const date::year& y, const date::month& m,
-                               const date::day& d) NOEXCEPT
-    : y_(y)
-    , m_(m)
-    , d_(d)
-    {}
+inline year_month_day::year_month_day(const date::year& y, const date::month& m,
+                                      const date::day& d) NOEXCEPT : y_(y),
+                                                                     m_(m),
+                                                                     d_(d) {}
 
 CONSTCD14
-inline
-year_month_day::year_month_day(const year_month_day_last& ymdl) NOEXCEPT
-    : y_(ymdl.year())
-    , m_(ymdl.month())
-    , d_(ymdl.day())
-    {}
+inline year_month_day::year_month_day(const year_month_day_last& ymdl) NOEXCEPT
+    : y_(ymdl.year()),
+      m_(ymdl.month()),
+      d_(ymdl.day()) {}
 
 CONSTCD14
-inline
-year_month_day::year_month_day(sys_days dp) NOEXCEPT
-    : year_month_day(from_days(dp.time_since_epoch()))
-    {}
+inline year_month_day::year_month_day(sys_days dp) NOEXCEPT
+    : year_month_day(from_days(dp.time_since_epoch())) {}
 
 CONSTCD14
-inline
-year_month_day::year_month_day(local_days dp) NOEXCEPT
-    : year_month_day(from_days(dp.time_since_epoch()))
-    {}
+inline year_month_day::year_month_day(local_days dp) NOEXCEPT
+    : year_month_day(from_days(dp.time_since_epoch())) {}
 
-CONSTCD11 inline year year_month_day::year() const NOEXCEPT {return y_;}
-CONSTCD11 inline month year_month_day::month() const NOEXCEPT {return m_;}
-CONSTCD11 inline day year_month_day::day() const NOEXCEPT {return d_;}
+CONSTCD11 inline year year_month_day::year() const NOEXCEPT { return y_; }
+CONSTCD11 inline month year_month_day::month() const NOEXCEPT { return m_; }
+CONSTCD11 inline day year_month_day::day() const NOEXCEPT { return d_; }
 
-template<class>
-CONSTCD14
-inline
-year_month_day&
-year_month_day::operator+=(const months& m) NOEXCEPT
-{
-    *this = *this + m;
-    return *this;
+template <class>
+CONSTCD14 inline year_month_day& year_month_day::operator+=(const months& m) NOEXCEPT {
+  *this = *this + m;
+  return *this;
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_day&
-year_month_day::operator-=(const months& m) NOEXCEPT
-{
-    *this = *this - m;
-    return *this;
+template <class>
+CONSTCD14 inline year_month_day& year_month_day::operator-=(const months& m) NOEXCEPT {
+  *this = *this - m;
+  return *this;
 }
 
 CONSTCD14
-inline
-year_month_day&
-year_month_day::operator+=(const years& y) NOEXCEPT
-{
-    *this = *this + y;
-    return *this;
+inline year_month_day& year_month_day::operator+=(const years& y) NOEXCEPT {
+  *this = *this + y;
+  return *this;
 }
 
 CONSTCD14
-inline
-year_month_day&
-year_month_day::operator-=(const years& y) NOEXCEPT
-{
-    *this = *this - y;
-    return *this;
+inline year_month_day& year_month_day::operator-=(const years& y) NOEXCEPT {
+  *this = *this - y;
+  return *this;
 }
 
 CONSTCD14
-inline
-days
-year_month_day::to_days() const NOEXCEPT
-{
-    static_assert(std::numeric_limits<unsigned>::digits >= 18,
-             "This algorithm has not been ported to a 16 bit unsigned integer");
-    static_assert(std::numeric_limits<int>::digits >= 20,
-             "This algorithm has not been ported to a 16 bit signed integer");
-    auto const y = static_cast<int>(y_) - (m_ <= February);
-    auto const m = static_cast<unsigned>(m_);
-    auto const d = static_cast<unsigned>(d_);
-    auto const era = (y >= 0 ? y : y-399) / 400;
-    auto const yoe = static_cast<unsigned>(y - era * 400);       // [0, 399]
-    auto const doy = (153*(m > 2 ? m-3 : m+9) + 2)/5 + d-1;      // [0, 365]
-    auto const doe = yoe * 365 + yoe/4 - yoe/100 + doy;          // [0, 146096]
-    return days{era * 146097 + static_cast<int>(doe) - 719468};
+inline days year_month_day::to_days() const NOEXCEPT {
+  static_assert(std::numeric_limits<unsigned>::digits >= 18,
+                "This algorithm has not been ported to a 16 bit unsigned integer");
+  static_assert(std::numeric_limits<int>::digits >= 20,
+                "This algorithm has not been ported to a 16 bit signed integer");
+  auto const y = static_cast<int>(y_) - (m_ <= February);
+  auto const m = static_cast<unsigned>(m_);
+  auto const d = static_cast<unsigned>(d_);
+  auto const era = (y >= 0 ? y : y - 399) / 400;
+  auto const yoe = static_cast<unsigned>(y - era * 400);             // [0, 399]
+  auto const doy = (153 * (m > 2 ? m - 3 : m + 9) + 2) / 5 + d - 1;  // [0, 365]
+  auto const doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;            // [0, 146096]
+  return days{era * 146097 + static_cast<int>(doe) - 719468};
 }
 
 CONSTCD14
-inline
-year_month_day::operator sys_days() const NOEXCEPT
-{
-    return sys_days{to_days()};
-}
+inline year_month_day::operator sys_days() const NOEXCEPT { return sys_days{to_days()}; }
 
 CONSTCD14
-inline
-year_month_day::operator local_days() const NOEXCEPT
-{
-    return local_days{to_days()};
+inline year_month_day::operator local_days() const NOEXCEPT {
+  return local_days{to_days()};
 }
 
 CONSTCD14
-inline
-bool
-year_month_day::ok() const NOEXCEPT
-{
-    if (!(y_.ok() && m_.ok()))
-        return false;
-    return date::day{1} <= d_ && d_ <= (y_ / m_ / last).day();
+inline bool year_month_day::ok() const NOEXCEPT {
+  if (!(y_.ok() && m_.ok())) return false;
+  return date::day{1} <= d_ && d_ <= (y_ / m_ / last).day();
 }
 
 CONSTCD11
-inline
-bool
-operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
-    return x.year() == y.year() && x.month() == y.month() && x.day() == y.day();
+inline bool operator==(const year_month_day& x, const year_month_day& y) NOEXCEPT {
+  return x.year() == y.year() && x.month() == y.month() && x.day() == y.day();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const year_month_day& x, const year_month_day& y) NOEXCEPT {
+  return !(x == y);
 }
 
 CONSTCD11
-inline
-bool
-operator<(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
-    return x.year() < y.year() ? true
-        : (x.year() > y.year() ? false
-        : (x.month() < y.month() ? true
-        : (x.month() > y.month() ? false
-        : (x.day() < y.day()))));
+inline bool operator<(const year_month_day& x, const year_month_day& y) NOEXCEPT {
+  return x.year() < y.year()
+             ? true
+             : (x.year() > y.year()
+                    ? false
+                    : (x.month() < y.month()
+                           ? true
+                           : (x.month() > y.month() ? false : (x.day() < y.day()))));
 }
 
 CONSTCD11
-inline
-bool
-operator>(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
-    return y < x;
+inline bool operator>(const year_month_day& x, const year_month_day& y) NOEXCEPT {
+  return y < x;
 }
 
 CONSTCD11
-inline
-bool
-operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
-    return !(y < x);
+inline bool operator<=(const year_month_day& x, const year_month_day& y) NOEXCEPT {
+  return !(y < x);
 }
 
 CONSTCD11
-inline
-bool
-operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT
-{
-    return !(x < y);
+inline bool operator>=(const year_month_day& x, const year_month_day& y) NOEXCEPT {
+  return !(x < y);
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_day& ymd)
-{
-    detail::save_ostream<CharT, Traits> _(os);
-    os.fill('0');
-    os.flags(std::ios::dec | std::ios::right);
-    os << ymd.year() << '-';
-    os.width(2);
-    os << static_cast<unsigned>(ymd.month()) << '-';
-    os << ymd.day();
-    if (!ymd.ok())
-        os << " is not a valid date";
-    return os;
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const year_month_day& ymd) {
+  detail::save_ostream<CharT, Traits> _(os);
+  os.fill('0');
+  os.flags(std::ios::dec | std::ios::right);
+  os << ymd.year() << '-';
+  os.width(2);
+  os << static_cast<unsigned>(ymd.month()) << '-';
+  os << ymd.day();
+  if (!ymd.ok()) os << " is not a valid date";
+  return os;
 }
 
 CONSTCD14
-inline
-year_month_day
-year_month_day::from_days(days dp) NOEXCEPT
-{
-    static_assert(std::numeric_limits<unsigned>::digits >= 18,
-             "This algorithm has not been ported to a 16 bit unsigned integer");
-    static_assert(std::numeric_limits<int>::digits >= 20,
-             "This algorithm has not been ported to a 16 bit signed integer");
-    auto const z = dp.count() + 719468;
-    auto const era = (z >= 0 ? z : z - 146096) / 146097;
-    auto const doe = static_cast<unsigned>(z - era * 146097);          // [0, 146096]
-    auto const yoe = (doe - doe/1460 + doe/36524 - doe/146096) / 365;  // [0, 399]
-    auto const y = static_cast<days::rep>(yoe) + era * 400;
-    auto const doy = doe - (365*yoe + yoe/4 - yoe/100);                // [0, 365]
-    auto const mp = (5*doy + 2)/153;                                   // [0, 11]
-    auto const d = doy - (153*mp+2)/5 + 1;                             // [1, 31]
-    auto const m = mp < 10 ? mp+3 : mp-9;                              // [1, 12]
-    return year_month_day{date::year{y + (m <= 2)}, date::month(m), date::day(d)};
-}
-
-template<class>
-CONSTCD14
-inline
-year_month_day
-operator+(const year_month_day& ymd, const months& dm) NOEXCEPT
-{
-    return (ymd.year() / ymd.month() + dm) / ymd.day();
+inline year_month_day year_month_day::from_days(days dp) NOEXCEPT {
+  static_assert(std::numeric_limits<unsigned>::digits >= 18,
+                "This algorithm has not been ported to a 16 bit unsigned integer");
+  static_assert(std::numeric_limits<int>::digits >= 20,
+                "This algorithm has not been ported to a 16 bit signed integer");
+  auto const z = dp.count() + 719468;
+  auto const era = (z >= 0 ? z : z - 146096) / 146097;
+  auto const doe = static_cast<unsigned>(z - era * 146097);                // [0, 146096]
+  auto const yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;  // [0, 399]
+  auto const y = static_cast<days::rep>(yoe) + era * 400;
+  auto const doy = doe - (365 * yoe + yoe / 4 - yoe / 100);  // [0, 365]
+  auto const mp = (5 * doy + 2) / 153;                       // [0, 11]
+  auto const d = doy - (153 * mp + 2) / 5 + 1;               // [1, 31]
+  auto const m = mp < 10 ? mp + 3 : mp - 9;                  // [1, 12]
+  return year_month_day{date::year{y + (m <= 2)}, date::month(m), date::day(d)};
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_day
-operator+(const months& dm, const year_month_day& ymd) NOEXCEPT
-{
-    return ymd + dm;
+template <class>
+CONSTCD14 inline year_month_day operator+(const year_month_day& ymd,
+                                          const months& dm) NOEXCEPT {
+  return (ymd.year() / ymd.month() + dm) / ymd.day();
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_day
-operator-(const year_month_day& ymd, const months& dm) NOEXCEPT
-{
-    return ymd + (-dm);
+template <class>
+CONSTCD14 inline year_month_day operator+(const months& dm,
+                                          const year_month_day& ymd) NOEXCEPT {
+  return ymd + dm;
+}
+
+template <class>
+CONSTCD14 inline year_month_day operator-(const year_month_day& ymd,
+                                          const months& dm) NOEXCEPT {
+  return ymd + (-dm);
 }
 
 CONSTCD11
-inline
-year_month_day
-operator+(const year_month_day& ymd, const years& dy) NOEXCEPT
-{
-    return (ymd.year() + dy) / ymd.month() / ymd.day();
+inline year_month_day operator+(const year_month_day& ymd, const years& dy) NOEXCEPT {
+  return (ymd.year() + dy) / ymd.month() / ymd.day();
 }
 
 CONSTCD11
-inline
-year_month_day
-operator+(const years& dy, const year_month_day& ymd) NOEXCEPT
-{
-    return ymd + dy;
+inline year_month_day operator+(const years& dy, const year_month_day& ymd) NOEXCEPT {
+  return ymd + dy;
 }
 
 CONSTCD11
-inline
-year_month_day
-operator-(const year_month_day& ymd, const years& dy) NOEXCEPT
-{
-    return ymd + (-dy);
+inline year_month_day operator-(const year_month_day& ymd, const years& dy) NOEXCEPT {
+  return ymd + (-dy);
 }
 
 // year_month_weekday
 
 CONSTCD11
-inline
-year_month_weekday::year_month_weekday(const date::year& y, const date::month& m,
-                                       const date::weekday_indexed& wdi)
-        NOEXCEPT
-    : y_(y)
-    , m_(m)
-    , wdi_(wdi)
-    {}
+inline year_month_weekday::year_month_weekday(const date::year& y, const date::month& m,
+                                              const date::weekday_indexed& wdi) NOEXCEPT
+    : y_(y),
+      m_(m),
+      wdi_(wdi) {}
 
 CONSTCD14
-inline
-year_month_weekday::year_month_weekday(const sys_days& dp) NOEXCEPT
-    : year_month_weekday(from_days(dp.time_since_epoch()))
-    {}
+inline year_month_weekday::year_month_weekday(const sys_days& dp) NOEXCEPT
+    : year_month_weekday(from_days(dp.time_since_epoch())) {}
 
 CONSTCD14
-inline
-year_month_weekday::year_month_weekday(const local_days& dp) NOEXCEPT
-    : year_month_weekday(from_days(dp.time_since_epoch()))
-    {}
+inline year_month_weekday::year_month_weekday(const local_days& dp) NOEXCEPT
+    : year_month_weekday(from_days(dp.time_since_epoch())) {}
 
-template<class>
-CONSTCD14
-inline
-year_month_weekday&
-year_month_weekday::operator+=(const months& m) NOEXCEPT
-{
-    *this = *this + m;
-    return *this;
+template <class>
+CONSTCD14 inline year_month_weekday& year_month_weekday::operator+=(
+    const months& m) NOEXCEPT {
+  *this = *this + m;
+  return *this;
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_weekday&
-year_month_weekday::operator-=(const months& m) NOEXCEPT
-{
-    *this = *this - m;
-    return *this;
+template <class>
+CONSTCD14 inline year_month_weekday& year_month_weekday::operator-=(
+    const months& m) NOEXCEPT {
+  *this = *this - m;
+  return *this;
 }
 
 CONSTCD14
-inline
-year_month_weekday&
-year_month_weekday::operator+=(const years& y) NOEXCEPT
-{
-    *this = *this + y;
-    return *this;
+inline year_month_weekday& year_month_weekday::operator+=(const years& y) NOEXCEPT {
+  *this = *this + y;
+  return *this;
 }
 
 CONSTCD14
-inline
-year_month_weekday&
-year_month_weekday::operator-=(const years& y) NOEXCEPT
-{
-    *this = *this - y;
-    return *this;
+inline year_month_weekday& year_month_weekday::operator-=(const years& y) NOEXCEPT {
+  *this = *this - y;
+  return *this;
 }
 
-CONSTCD11 inline year year_month_weekday::year() const NOEXCEPT {return y_;}
-CONSTCD11 inline month year_month_weekday::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline year year_month_weekday::year() const NOEXCEPT { return y_; }
+CONSTCD11 inline month year_month_weekday::month() const NOEXCEPT { return m_; }
 
 CONSTCD11
-inline
-weekday
-year_month_weekday::weekday() const NOEXCEPT
-{
-    return wdi_.weekday();
-}
+inline weekday year_month_weekday::weekday() const NOEXCEPT { return wdi_.weekday(); }
 
 CONSTCD11
-inline
-unsigned
-year_month_weekday::index() const NOEXCEPT
-{
-    return wdi_.index();
-}
+inline unsigned year_month_weekday::index() const NOEXCEPT { return wdi_.index(); }
 
 CONSTCD11
-inline
-weekday_indexed
-year_month_weekday::weekday_indexed() const NOEXCEPT
-{
-    return wdi_;
+inline weekday_indexed year_month_weekday::weekday_indexed() const NOEXCEPT {
+  return wdi_;
 }
 
 CONSTCD14
-inline
-year_month_weekday::operator sys_days() const NOEXCEPT
-{
-    return sys_days{to_days()};
+inline year_month_weekday::operator sys_days() const NOEXCEPT {
+  return sys_days{to_days()};
 }
 
 CONSTCD14
-inline
-year_month_weekday::operator local_days() const NOEXCEPT
-{
-    return local_days{to_days()};
+inline year_month_weekday::operator local_days() const NOEXCEPT {
+  return local_days{to_days()};
 }
 
 CONSTCD14
-inline
-bool
-year_month_weekday::ok() const NOEXCEPT
-{
-    if (!y_.ok() || !m_.ok() || !wdi_.weekday().ok() || wdi_.index() < 1)
-        return false;
-    if (wdi_.index() <= 4)
-        return true;
-    auto d2 = wdi_.weekday() - date::weekday(static_cast<sys_days>(y_/m_/1)) +
-                  days((wdi_.index()-1)*7 + 1);
-    return static_cast<unsigned>(d2.count()) <= static_cast<unsigned>((y_/m_/last).day());
+inline bool year_month_weekday::ok() const NOEXCEPT {
+  if (!y_.ok() || !m_.ok() || !wdi_.weekday().ok() || wdi_.index() < 1) return false;
+  if (wdi_.index() <= 4) return true;
+  auto d2 = wdi_.weekday() - date::weekday(static_cast<sys_days>(y_ / m_ / 1)) +
+            days((wdi_.index() - 1) * 7 + 1);
+  return static_cast<unsigned>(d2.count()) <=
+         static_cast<unsigned>((y_ / m_ / last).day());
 }
 
 CONSTCD14
-inline
-year_month_weekday
-year_month_weekday::from_days(days d) NOEXCEPT
-{
-    sys_days dp{d};
-    auto const wd = date::weekday(dp);
-    auto const ymd = year_month_day(dp);
-    return {ymd.year(), ymd.month(), wd[(static_cast<unsigned>(ymd.day())-1)/7+1]};
+inline year_month_weekday year_month_weekday::from_days(days d) NOEXCEPT {
+  sys_days dp{d};
+  auto const wd = date::weekday(dp);
+  auto const ymd = year_month_day(dp);
+  return {ymd.year(), ymd.month(), wd[(static_cast<unsigned>(ymd.day()) - 1) / 7 + 1]};
 }
 
 CONSTCD14
-inline
-days
-year_month_weekday::to_days() const NOEXCEPT
-{
-    auto d = sys_days(y_/m_/1);
-    return (d + (wdi_.weekday() - date::weekday(d) + days{(wdi_.index()-1)*7})
-           ).time_since_epoch();
+inline days year_month_weekday::to_days() const NOEXCEPT {
+  auto d = sys_days(y_ / m_ / 1);
+  return (d + (wdi_.weekday() - date::weekday(d) + days{(wdi_.index() - 1) * 7}))
+      .time_since_epoch();
 }
 
 CONSTCD11
-inline
-bool
-operator==(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT
-{
-    return x.year() == y.year() && x.month() == y.month() &&
-           x.weekday_indexed() == y.weekday_indexed();
+inline bool operator==(const year_month_weekday& x,
+                       const year_month_weekday& y) NOEXCEPT {
+  return x.year() == y.year() && x.month() == y.month() &&
+         x.weekday_indexed() == y.weekday_indexed();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const year_month_weekday& x, const year_month_weekday& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const year_month_weekday& x,
+                       const year_month_weekday& y) NOEXCEPT {
+  return !(x == y);
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday& ymwdi)
-{
-    return os << ymwdi.year() << '/' << ymwdi.month()
-              << '/' << ymwdi.weekday_indexed();
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const year_month_weekday& ymwdi) {
+  return os << ymwdi.year() << '/' << ymwdi.month() << '/' << ymwdi.weekday_indexed();
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_weekday
-operator+(const year_month_weekday& ymwd, const months& dm) NOEXCEPT
-{
-    return (ymwd.year() / ymwd.month() + dm) / ymwd.weekday_indexed();
+template <class>
+CONSTCD14 inline year_month_weekday operator+(const year_month_weekday& ymwd,
+                                              const months& dm) NOEXCEPT {
+  return (ymwd.year() / ymwd.month() + dm) / ymwd.weekday_indexed();
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_weekday
-operator+(const months& dm, const year_month_weekday& ymwd) NOEXCEPT
-{
-    return ymwd + dm;
+template <class>
+CONSTCD14 inline year_month_weekday operator+(const months& dm,
+                                              const year_month_weekday& ymwd) NOEXCEPT {
+  return ymwd + dm;
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_weekday
-operator-(const year_month_weekday& ymwd, const months& dm) NOEXCEPT
-{
-    return ymwd + (-dm);
+template <class>
+CONSTCD14 inline year_month_weekday operator-(const year_month_weekday& ymwd,
+                                              const months& dm) NOEXCEPT {
+  return ymwd + (-dm);
 }
 
 CONSTCD11
-inline
-year_month_weekday
-operator+(const year_month_weekday& ymwd, const years& dy) NOEXCEPT
-{
-    return {ymwd.year()+dy, ymwd.month(), ymwd.weekday_indexed()};
+inline year_month_weekday operator+(const year_month_weekday& ymwd,
+                                    const years& dy) NOEXCEPT {
+  return {ymwd.year() + dy, ymwd.month(), ymwd.weekday_indexed()};
 }
 
 CONSTCD11
-inline
-year_month_weekday
-operator+(const years& dy, const year_month_weekday& ymwd) NOEXCEPT
-{
-    return ymwd + dy;
+inline year_month_weekday operator+(const years& dy,
+                                    const year_month_weekday& ymwd) NOEXCEPT {
+  return ymwd + dy;
 }
 
 CONSTCD11
-inline
-year_month_weekday
-operator-(const year_month_weekday& ymwd, const years& dy) NOEXCEPT
-{
-    return ymwd + (-dy);
+inline year_month_weekday operator-(const year_month_weekday& ymwd,
+                                    const years& dy) NOEXCEPT {
+  return ymwd + (-dy);
 }
 
 // year_month_weekday_last
 
 CONSTCD11
-inline
-year_month_weekday_last::year_month_weekday_last(const date::year& y,
-                                                 const date::month& m,
-                                                 const date::weekday_last& wdl) NOEXCEPT
-    : y_(y)
-    , m_(m)
-    , wdl_(wdl)
-    {}
+inline year_month_weekday_last::year_month_weekday_last(
+    const date::year& y, const date::month& m, const date::weekday_last& wdl) NOEXCEPT
+    : y_(y),
+      m_(m),
+      wdl_(wdl) {}
 
-template<class>
-CONSTCD14
-inline
-year_month_weekday_last&
-year_month_weekday_last::operator+=(const months& m) NOEXCEPT
-{
-    *this = *this + m;
-    return *this;
+template <class>
+CONSTCD14 inline year_month_weekday_last& year_month_weekday_last::operator+=(
+    const months& m) NOEXCEPT {
+  *this = *this + m;
+  return *this;
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_weekday_last&
-year_month_weekday_last::operator-=(const months& m) NOEXCEPT
-{
-    *this = *this - m;
-    return *this;
+template <class>
+CONSTCD14 inline year_month_weekday_last& year_month_weekday_last::operator-=(
+    const months& m) NOEXCEPT {
+  *this = *this - m;
+  return *this;
 }
 
 CONSTCD14
-inline
-year_month_weekday_last&
-year_month_weekday_last::operator+=(const years& y) NOEXCEPT
-{
-    *this = *this + y;
-    return *this;
+inline year_month_weekday_last& year_month_weekday_last::operator+=(
+    const years& y) NOEXCEPT {
+  *this = *this + y;
+  return *this;
 }
 
 CONSTCD14
-inline
-year_month_weekday_last&
-year_month_weekday_last::operator-=(const years& y) NOEXCEPT
-{
-    *this = *this - y;
-    return *this;
+inline year_month_weekday_last& year_month_weekday_last::operator-=(
+    const years& y) NOEXCEPT {
+  *this = *this - y;
+  return *this;
 }
 
-CONSTCD11 inline year year_month_weekday_last::year() const NOEXCEPT {return y_;}
-CONSTCD11 inline month year_month_weekday_last::month() const NOEXCEPT {return m_;}
+CONSTCD11 inline year year_month_weekday_last::year() const NOEXCEPT { return y_; }
+CONSTCD11 inline month year_month_weekday_last::month() const NOEXCEPT { return m_; }
 
 CONSTCD11
-inline
-weekday
-year_month_weekday_last::weekday() const NOEXCEPT
-{
-    return wdl_.weekday();
+inline weekday year_month_weekday_last::weekday() const NOEXCEPT {
+  return wdl_.weekday();
 }
 
 CONSTCD11
-inline
-weekday_last
-year_month_weekday_last::weekday_last() const NOEXCEPT
-{
-    return wdl_;
+inline weekday_last year_month_weekday_last::weekday_last() const NOEXCEPT {
+  return wdl_;
 }
 
 CONSTCD14
-inline
-year_month_weekday_last::operator sys_days() const NOEXCEPT
-{
-    return sys_days{to_days()};
+inline year_month_weekday_last::operator sys_days() const NOEXCEPT {
+  return sys_days{to_days()};
 }
 
 CONSTCD14
-inline
-year_month_weekday_last::operator local_days() const NOEXCEPT
-{
-    return local_days{to_days()};
+inline year_month_weekday_last::operator local_days() const NOEXCEPT {
+  return local_days{to_days()};
 }
 
 CONSTCD11
-inline
-bool
-year_month_weekday_last::ok() const NOEXCEPT
-{
-    return y_.ok() && m_.ok() && wdl_.ok();
+inline bool year_month_weekday_last::ok() const NOEXCEPT {
+  return y_.ok() && m_.ok() && wdl_.ok();
 }
 
 CONSTCD14
-inline
-days
-year_month_weekday_last::to_days() const NOEXCEPT
-{
-    auto const d = sys_days(y_/m_/last);
-    return (d - (date::weekday{d} - wdl_.weekday())).time_since_epoch();
+inline days year_month_weekday_last::to_days() const NOEXCEPT {
+  auto const d = sys_days(y_ / m_ / last);
+  return (d - (date::weekday{d} - wdl_.weekday())).time_since_epoch();
 }
 
 CONSTCD11
-inline
-bool
-operator==(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT
-{
-    return x.year() == y.year() && x.month() == y.month() &&
-           x.weekday_last() == y.weekday_last();
+inline bool operator==(const year_month_weekday_last& x,
+                       const year_month_weekday_last& y) NOEXCEPT {
+  return x.year() == y.year() && x.month() == y.month() &&
+         x.weekday_last() == y.weekday_last();
 }
 
 CONSTCD11
-inline
-bool
-operator!=(const year_month_weekday_last& x, const year_month_weekday_last& y) NOEXCEPT
-{
-    return !(x == y);
+inline bool operator!=(const year_month_weekday_last& x,
+                       const year_month_weekday_last& y) NOEXCEPT {
+  return !(x == y);
 }
 
-template<class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last& ymwdl)
-{
-    return os << ymwdl.year() << '/' << ymwdl.month() << '/' << ymwdl.weekday_last();
+template <class CharT, class Traits>
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const year_month_weekday_last& ymwdl) {
+  return os << ymwdl.year() << '/' << ymwdl.month() << '/' << ymwdl.weekday_last();
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_weekday_last
-operator+(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT
-{
-    return (ymwdl.year() / ymwdl.month() + dm) / ymwdl.weekday_last();
+template <class>
+CONSTCD14 inline year_month_weekday_last operator+(const year_month_weekday_last& ymwdl,
+                                                   const months& dm) NOEXCEPT {
+  return (ymwdl.year() / ymwdl.month() + dm) / ymwdl.weekday_last();
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_weekday_last
-operator+(const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT
-{
-    return ymwdl + dm;
+template <class>
+CONSTCD14 inline year_month_weekday_last operator+(
+    const months& dm, const year_month_weekday_last& ymwdl) NOEXCEPT {
+  return ymwdl + dm;
 }
 
-template<class>
-CONSTCD14
-inline
-year_month_weekday_last
-operator-(const year_month_weekday_last& ymwdl, const months& dm) NOEXCEPT
-{
-    return ymwdl + (-dm);
+template <class>
+CONSTCD14 inline year_month_weekday_last operator-(const year_month_weekday_last& ymwdl,
+                                                   const months& dm) NOEXCEPT {
+  return ymwdl + (-dm);
 }
 
 CONSTCD11
-inline
-year_month_weekday_last
-operator+(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT
-{
-    return {ymwdl.year()+dy, ymwdl.month(), ymwdl.weekday_last()};
+inline year_month_weekday_last operator+(const year_month_weekday_last& ymwdl,
+                                         const years& dy) NOEXCEPT {
+  return {ymwdl.year() + dy, ymwdl.month(), ymwdl.weekday_last()};
 }
 
 CONSTCD11
-inline
-year_month_weekday_last
-operator+(const years& dy, const year_month_weekday_last& ymwdl) NOEXCEPT
-{
-    return ymwdl + dy;
+inline year_month_weekday_last operator+(const years& dy,
+                                         const year_month_weekday_last& ymwdl) NOEXCEPT {
+  return ymwdl + dy;
 }
 
 CONSTCD11
-inline
-year_month_weekday_last
-operator-(const year_month_weekday_last& ymwdl, const years& dy) NOEXCEPT
-{
-    return ymwdl + (-dy);
+inline year_month_weekday_last operator-(const year_month_weekday_last& ymwdl,
+                                         const years& dy) NOEXCEPT {
+  return ymwdl + (-dy);
 }
 
 // year_month from operator/()
 
 CONSTCD11
-inline
-year_month
-operator/(const year& y, const month& m) NOEXCEPT
-{
-    return {y, m};
-}
+inline year_month operator/(const year& y, const month& m) NOEXCEPT { return {y, m}; }
 
 CONSTCD11
-inline
-year_month
-operator/(const year& y, int   m) NOEXCEPT
-{
-    return y / month(static_cast<unsigned>(m));
+inline year_month operator/(const year& y, int m) NOEXCEPT {
+  return y / month(static_cast<unsigned>(m));
 }
 
 // month_day from operator/()
 
 CONSTCD11
-inline
-month_day
-operator/(const month& m, const day& d) NOEXCEPT
-{
-    return {m, d};
-}
+inline month_day operator/(const month& m, const day& d) NOEXCEPT { return {m, d}; }
 
 CONSTCD11
-inline
-month_day
-operator/(const day& d, const month& m) NOEXCEPT
-{
-    return m / d;
-}
+inline month_day operator/(const day& d, const month& m) NOEXCEPT { return m / d; }
 
 CONSTCD11
-inline
-month_day
-operator/(const month& m, int d) NOEXCEPT
-{
-    return m / day(static_cast<unsigned>(d));
+inline month_day operator/(const month& m, int d) NOEXCEPT {
+  return m / day(static_cast<unsigned>(d));
 }
 
 CONSTCD11
-inline
-month_day
-operator/(int m, const day& d) NOEXCEPT
-{
-    return month(static_cast<unsigned>(m)) / d;
+inline month_day operator/(int m, const day& d) NOEXCEPT {
+  return month(static_cast<unsigned>(m)) / d;
 }
 
-CONSTCD11 inline month_day operator/(const day& d, int m) NOEXCEPT {return m / d;}
+CONSTCD11 inline month_day operator/(const day& d, int m) NOEXCEPT { return m / d; }
 
 // month_day_last from operator/()
 
 CONSTCD11
-inline
-month_day_last
-operator/(const month& m, last_spec) NOEXCEPT
-{
-    return month_day_last{m};
+inline month_day_last operator/(const month& m, last_spec) NOEXCEPT {
+  return month_day_last{m};
 }
 
 CONSTCD11
-inline
-month_day_last
-operator/(last_spec, const month& m) NOEXCEPT
-{
-    return m/last;
-}
+inline month_day_last operator/(last_spec, const month& m) NOEXCEPT { return m / last; }
 
 CONSTCD11
-inline
-month_day_last
-operator/(int m, last_spec) NOEXCEPT
-{
-    return month(static_cast<unsigned>(m))/last;
+inline month_day_last operator/(int m, last_spec) NOEXCEPT {
+  return month(static_cast<unsigned>(m)) / last;
 }
 
 CONSTCD11
-inline
-month_day_last
-operator/(last_spec, int m) NOEXCEPT
-{
-    return m/last;
-}
+inline month_day_last operator/(last_spec, int m) NOEXCEPT { return m / last; }
 
 // month_weekday from operator/()
 
 CONSTCD11
-inline
-month_weekday
-operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT
-{
-    return {m, wdi};
+inline month_weekday operator/(const month& m, const weekday_indexed& wdi) NOEXCEPT {
+  return {m, wdi};
 }
 
 CONSTCD11
-inline
-month_weekday
-operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT
-{
-    return m / wdi;
+inline month_weekday operator/(const weekday_indexed& wdi, const month& m) NOEXCEPT {
+  return m / wdi;
 }
 
 CONSTCD11
-inline
-month_weekday
-operator/(int m, const weekday_indexed& wdi) NOEXCEPT
-{
-    return month(static_cast<unsigned>(m)) / wdi;
+inline month_weekday operator/(int m, const weekday_indexed& wdi) NOEXCEPT {
+  return month(static_cast<unsigned>(m)) / wdi;
 }
 
 CONSTCD11
-inline
-month_weekday
-operator/(const weekday_indexed& wdi, int m) NOEXCEPT
-{
-    return m / wdi;
+inline month_weekday operator/(const weekday_indexed& wdi, int m) NOEXCEPT {
+  return m / wdi;
 }
 
 // month_weekday_last from operator/()
 
 CONSTCD11
-inline
-month_weekday_last
-operator/(const month& m, const weekday_last& wdl) NOEXCEPT
-{
-    return {m, wdl};
+inline month_weekday_last operator/(const month& m, const weekday_last& wdl) NOEXCEPT {
+  return {m, wdl};
 }
 
 CONSTCD11
-inline
-month_weekday_last
-operator/(const weekday_last& wdl, const month& m) NOEXCEPT
-{
-    return m / wdl;
+inline month_weekday_last operator/(const weekday_last& wdl, const month& m) NOEXCEPT {
+  return m / wdl;
 }
 
 CONSTCD11
-inline
-month_weekday_last
-operator/(int m, const weekday_last& wdl) NOEXCEPT
-{
-    return month(static_cast<unsigned>(m)) / wdl;
+inline month_weekday_last operator/(int m, const weekday_last& wdl) NOEXCEPT {
+  return month(static_cast<unsigned>(m)) / wdl;
 }
 
 CONSTCD11
-inline
-month_weekday_last
-operator/(const weekday_last& wdl, int m) NOEXCEPT
-{
-    return m / wdl;
+inline month_weekday_last operator/(const weekday_last& wdl, int m) NOEXCEPT {
+  return m / wdl;
 }
 
 // year_month_day from operator/()
 
 CONSTCD11
-inline
-year_month_day
-operator/(const year_month& ym, const day& d) NOEXCEPT
-{
-    return {ym.year(), ym.month(), d};
+inline year_month_day operator/(const year_month& ym, const day& d) NOEXCEPT {
+  return {ym.year(), ym.month(), d};
 }
 
 CONSTCD11
-inline
-year_month_day
-operator/(const year_month& ym, int d)  NOEXCEPT
-{
-    return ym / day(static_cast<unsigned>(d));
+inline year_month_day operator/(const year_month& ym, int d) NOEXCEPT {
+  return ym / day(static_cast<unsigned>(d));
 }
 
 CONSTCD11
-inline
-year_month_day
-operator/(const year& y, const month_day& md) NOEXCEPT
-{
-    return y / md.month() / md.day();
+inline year_month_day operator/(const year& y, const month_day& md) NOEXCEPT {
+  return y / md.month() / md.day();
 }
 
 CONSTCD11
-inline
-year_month_day
-operator/(int y, const month_day& md) NOEXCEPT
-{
-    return year(y) / md;
+inline year_month_day operator/(int y, const month_day& md) NOEXCEPT {
+  return year(y) / md;
 }
 
 CONSTCD11
-inline
-year_month_day
-operator/(const month_day& md, const year& y)  NOEXCEPT
-{
-    return y / md;
+inline year_month_day operator/(const month_day& md, const year& y) NOEXCEPT {
+  return y / md;
 }
 
 CONSTCD11
-inline
-year_month_day
-operator/(const month_day& md, int y) NOEXCEPT
-{
-    return year(y) / md;
+inline year_month_day operator/(const month_day& md, int y) NOEXCEPT {
+  return year(y) / md;
 }
 
 // year_month_day_last from operator/()
 
 CONSTCD11
-inline
-year_month_day_last
-operator/(const year_month& ym, last_spec) NOEXCEPT
-{
-    return {ym.year(), month_day_last{ym.month()}};
+inline year_month_day_last operator/(const year_month& ym, last_spec) NOEXCEPT {
+  return {ym.year(), month_day_last{ym.month()}};
 }
 
 CONSTCD11
-inline
-year_month_day_last
-operator/(const year& y, const month_day_last& mdl) NOEXCEPT
-{
-    return {y, mdl};
+inline year_month_day_last operator/(const year& y, const month_day_last& mdl) NOEXCEPT {
+  return {y, mdl};
 }
 
 CONSTCD11
-inline
-year_month_day_last
-operator/(int y, const month_day_last& mdl) NOEXCEPT
-{
-    return year(y) / mdl;
+inline year_month_day_last operator/(int y, const month_day_last& mdl) NOEXCEPT {
+  return year(y) / mdl;
 }
 
 CONSTCD11
-inline
-year_month_day_last
-operator/(const month_day_last& mdl, const year& y) NOEXCEPT
-{
-    return y / mdl;
+inline year_month_day_last operator/(const month_day_last& mdl, const year& y) NOEXCEPT {
+  return y / mdl;
 }
 
 CONSTCD11
-inline
-year_month_day_last
-operator/(const month_day_last& mdl, int y) NOEXCEPT
-{
-    return year(y) / mdl;
+inline year_month_day_last operator/(const month_day_last& mdl, int y) NOEXCEPT {
+  return year(y) / mdl;
 }
 
 // year_month_weekday from operator/()
 
 CONSTCD11
-inline
-year_month_weekday
-operator/(const year_month& ym, const weekday_indexed& wdi) NOEXCEPT
-{
-    return {ym.year(), ym.month(), wdi};
+inline year_month_weekday operator/(const year_month& ym,
+                                    const weekday_indexed& wdi) NOEXCEPT {
+  return {ym.year(), ym.month(), wdi};
 }
 
 CONSTCD11
-inline
-year_month_weekday
-operator/(const year& y, const month_weekday& mwd) NOEXCEPT
-{
-    return {y, mwd.month(), mwd.weekday_indexed()};
+inline year_month_weekday operator/(const year& y, const month_weekday& mwd) NOEXCEPT {
+  return {y, mwd.month(), mwd.weekday_indexed()};
 }
 
 CONSTCD11
-inline
-year_month_weekday
-operator/(int y, const month_weekday& mwd) NOEXCEPT
-{
-    return year(y) / mwd;
+inline year_month_weekday operator/(int y, const month_weekday& mwd) NOEXCEPT {
+  return year(y) / mwd;
 }
 
 CONSTCD11
-inline
-year_month_weekday
-operator/(const month_weekday& mwd, const year& y) NOEXCEPT
-{
-    return y / mwd;
+inline year_month_weekday operator/(const month_weekday& mwd, const year& y) NOEXCEPT {
+  return y / mwd;
 }
 
 CONSTCD11
-inline
-year_month_weekday
-operator/(const month_weekday& mwd, int y) NOEXCEPT
-{
-    return year(y) / mwd;
+inline year_month_weekday operator/(const month_weekday& mwd, int y) NOEXCEPT {
+  return year(y) / mwd;
 }
 
 // year_month_weekday_last from operator/()
 
 CONSTCD11
-inline
-year_month_weekday_last
-operator/(const year_month& ym, const weekday_last& wdl) NOEXCEPT
-{
-    return {ym.year(), ym.month(), wdl};
+inline year_month_weekday_last operator/(const year_month& ym,
+                                         const weekday_last& wdl) NOEXCEPT {
+  return {ym.year(), ym.month(), wdl};
 }
 
 CONSTCD11
-inline
-year_month_weekday_last
-operator/(const year& y, const month_weekday_last& mwdl) NOEXCEPT
-{
-    return {y, mwdl.month(), mwdl.weekday_last()};
+inline year_month_weekday_last operator/(const year& y,
+                                         const month_weekday_last& mwdl) NOEXCEPT {
+  return {y, mwdl.month(), mwdl.weekday_last()};
 }
 
 CONSTCD11
-inline
-year_month_weekday_last
-operator/(int y, const month_weekday_last& mwdl) NOEXCEPT
-{
-    return year(y) / mwdl;
+inline year_month_weekday_last operator/(int y, const month_weekday_last& mwdl) NOEXCEPT {
+  return year(y) / mwdl;
 }
 
 CONSTCD11
-inline
-year_month_weekday_last
-operator/(const month_weekday_last& mwdl, const year& y) NOEXCEPT
-{
-    return y / mwdl;
+inline year_month_weekday_last operator/(const month_weekday_last& mwdl,
+                                         const year& y) NOEXCEPT {
+  return y / mwdl;
 }
 
 CONSTCD11
-inline
-year_month_weekday_last
-operator/(const month_weekday_last& mwdl, int y) NOEXCEPT
-{
-    return year(y) / mwdl;
+inline year_month_weekday_last operator/(const month_weekday_last& mwdl, int y) NOEXCEPT {
+  return year(y) / mwdl;
 }
 
 template <class Duration>
 struct fields;
 
 template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-          const fields<Duration>& fds, const std::string* abbrev = nullptr,
-          const std::chrono::seconds* offset_sec = nullptr);
+std::basic_ostream<CharT, Traits>& to_stream(
+    std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const fields<Duration>& fds,
+    const std::string* abbrev = nullptr,
+    const std::chrono::seconds* offset_sec = nullptr);
 
 template <class CharT, class Traits, class Duration, class Alloc>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
-            fields<Duration>& fds, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr);
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, fields<Duration>& fds,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr);
 
 // hh_mm_ss
 
-namespace detail
-{
+namespace detail {
 
-struct undocumented {explicit undocumented() = default;};
+struct undocumented {
+  explicit undocumented() = default;
+};
 
 // width<n>::value is the number of fractional decimal digits in 1/n
 // width<0>::value and width<1>::value are defined to be 0
@@ -3702,858 +2863,583 @@ struct undocumented {explicit undocumented() = default;};
 // Example:  width<1000>::value ==  3
 template <std::uint64_t n, std::uint64_t d = 10, unsigned w = 0,
           bool should_continue = !(n < 2) && d != 0 && (w < 19)>
-struct width
-{
-    static CONSTDATA unsigned value = 1 + width<n, d%n*10, w+1>::value;
+struct width {
+  static CONSTDATA unsigned value = 1 + width<n, d % n * 10, w + 1>::value;
 };
 
 template <std::uint64_t n, std::uint64_t d, unsigned w>
-struct width<n, d, w, false>
-{
-    static CONSTDATA unsigned value = 0;
+struct width<n, d, w, false> {
+  static CONSTDATA unsigned value = 0;
 };
 
 template <unsigned exp>
-struct static_pow10
-{
-private:
-    static CONSTDATA std::uint64_t h = static_pow10<exp/2>::value;
-public:
-    static CONSTDATA std::uint64_t value = h * h * (exp % 2 ? 10 : 1);
+struct static_pow10 {
+ private:
+  static CONSTDATA std::uint64_t h = static_pow10<exp / 2>::value;
+
+ public:
+  static CONSTDATA std::uint64_t value = h * h * (exp % 2 ? 10 : 1);
 };
 
 template <>
-struct static_pow10<0>
-{
-    static CONSTDATA std::uint64_t value = 1;
+struct static_pow10<0> {
+  static CONSTDATA std::uint64_t value = 1;
 };
 
 template <class Duration>
-class decimal_format_seconds
-{
-    using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
-    using rep = typename CT::rep;
-public:
-    static unsigned constexpr width = detail::width<CT::period::den>::value < 19 ?
-                                      detail::width<CT::period::den>::value : 6u;
-    using precision = std::chrono::duration<rep,
-                                            std::ratio<1, static_pow10<width>::value>>;
-
-private:
-    std::chrono::seconds s_;
-    precision            sub_s_;
-
-public:
-    CONSTCD11 decimal_format_seconds()
-        : s_()
-        , sub_s_()
-        {}
-
-    CONSTCD11 explicit decimal_format_seconds(const Duration& d) NOEXCEPT
-        : s_(std::chrono::duration_cast<std::chrono::seconds>(d))
-        , sub_s_(std::chrono::treat_as_floating_point<rep>::value ? d - s_ :
-                     std::chrono::duration_cast<precision>(d - s_))
-        {}
-
-    CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT {return s_;}
-    CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_;}
-    CONSTCD11 precision subseconds() const NOEXCEPT {return sub_s_;}
-
-    CONSTCD14 precision to_duration() const NOEXCEPT
-    {
-        return s_ + sub_s_;
-    }
-
-    CONSTCD11 bool in_conventional_range() const NOEXCEPT
-    {
-        return sub_s_ < std::chrono::seconds{1} && s_ < std::chrono::minutes{1};
-    }
-
-    template <class CharT, class Traits>
-    friend
-    std::basic_ostream<CharT, Traits>&
-    operator<<(std::basic_ostream<CharT, Traits>& os, const decimal_format_seconds& x)
-    {
-        return x.print(os, std::chrono::treat_as_floating_point<rep>{});
-    }
-
-    template <class CharT, class Traits>
-    std::basic_ostream<CharT, Traits>&
-    print(std::basic_ostream<CharT, Traits>& os, std::true_type) const
-    {
-        date::detail::save_ostream<CharT, Traits> _(os);
-        std::chrono::duration<rep> d = s_ + sub_s_;
-        if (d < std::chrono::seconds{10})
-            os << '0';
-        os << std::fixed << d.count();
-        return os;
-    }
+class decimal_format_seconds {
+  using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+  using rep = typename CT::rep;
+
+ public:
+  static unsigned constexpr width = detail::width<CT::period::den>::value < 19
+                                        ? detail::width<CT::period::den>::value
+                                        : 6u;
+  using precision = std::chrono::duration<rep, std::ratio<1, static_pow10<width>::value>>;
+
+ private:
+  std::chrono::seconds s_;
+  precision sub_s_;
+
+ public:
+  CONSTCD11 decimal_format_seconds() : s_(), sub_s_() {}
+
+  CONSTCD11 explicit decimal_format_seconds(const Duration& d) NOEXCEPT
+      : s_(std::chrono::duration_cast<std::chrono::seconds>(d)),
+        sub_s_(std::chrono::treat_as_floating_point<rep>::value
+                   ? d - s_
+                   : std::chrono::duration_cast<precision>(d - s_)) {}
+
+  CONSTCD14 std::chrono::seconds& seconds() NOEXCEPT { return s_; }
+  CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT { return s_; }
+  CONSTCD11 precision subseconds() const NOEXCEPT { return sub_s_; }
+
+  CONSTCD14 precision to_duration() const NOEXCEPT { return s_ + sub_s_; }
+
+  CONSTCD11 bool in_conventional_range() const NOEXCEPT {
+    return sub_s_ < std::chrono::seconds{1} && s_ < std::chrono::minutes{1};
+  }
+
+  template <class CharT, class Traits>
+  friend std::basic_ostream<CharT, Traits>& operator<<(
+      std::basic_ostream<CharT, Traits>& os, const decimal_format_seconds& x) {
+    return x.print(os, std::chrono::treat_as_floating_point<rep>{});
+  }
+
+  template <class CharT, class Traits>
+  std::basic_ostream<CharT, Traits>& print(std::basic_ostream<CharT, Traits>& os,
+                                           std::true_type) const {
+    date::detail::save_ostream<CharT, Traits> _(os);
+    std::chrono::duration<rep> d = s_ + sub_s_;
+    if (d < std::chrono::seconds{10}) os << '0';
+    os << std::fixed << d.count();
+    return os;
+  }
 
-    template <class CharT, class Traits>
-    std::basic_ostream<CharT, Traits>&
-    print(std::basic_ostream<CharT, Traits>& os, std::false_type) const
-    {
-        date::detail::save_ostream<CharT, Traits> _(os);
-        os.fill('0');
-        os.flags(std::ios::dec | std::ios::right);
-        os.width(2);
-        os << s_.count();
-        if (width > 0)
-        {
+  template <class CharT, class Traits>
+  std::basic_ostream<CharT, Traits>& print(std::basic_ostream<CharT, Traits>& os,
+                                           std::false_type) const {
+    date::detail::save_ostream<CharT, Traits> _(os);
+    os.fill('0');
+    os.flags(std::ios::dec | std::ios::right);
+    os.width(2);
+    os << s_.count();
+    if (width > 0) {
 #if !ONLY_C_LOCALE
-            os << std::use_facet<std::numpunct<CharT>>(os.getloc()).decimal_point();
+      os << std::use_facet<std::numpunct<CharT>>(os.getloc()).decimal_point();
 #else
-            os << '.';
+      os << '.';
 #endif
-            os.width(width);
-            os << sub_s_.count();
-        }
-        return os;
+      os.width(width);
+      os << sub_s_.count();
     }
+    return os;
+  }
 };
 
 template <class Rep, class Period>
-inline
-CONSTCD11
-typename std::enable_if
-         <
-            std::numeric_limits<Rep>::is_signed,
-            std::chrono::duration<Rep, Period>
-         >::type
-abs(std::chrono::duration<Rep, Period> d)
-{
-    return d >= d.zero() ? +d : -d;
+inline CONSTCD11 typename std::enable_if<std::numeric_limits<Rep>::is_signed,
+                                         std::chrono::duration<Rep, Period>>::type
+abs(std::chrono::duration<Rep, Period> d) {
+  return d >= d.zero() ? +d : -d;
 }
 
 template <class Rep, class Period>
-inline
-CONSTCD11
-typename std::enable_if
-         <
-            !std::numeric_limits<Rep>::is_signed,
-            std::chrono::duration<Rep, Period>
-         >::type
-abs(std::chrono::duration<Rep, Period> d)
-{
-    return d;
+inline CONSTCD11 typename std::enable_if<!std::numeric_limits<Rep>::is_signed,
+                                         std::chrono::duration<Rep, Period>>::type
+abs(std::chrono::duration<Rep, Period> d) {
+  return d;
 }
 
 }  // namespace detail
 
 template <class Duration>
-class hh_mm_ss
-{
-    using dfs = detail::decimal_format_seconds<typename std::common_type<Duration,
-                                               std::chrono::seconds>::type>;
-
-    std::chrono::hours h_;
-    std::chrono::minutes m_;
-    dfs s_;
-    bool neg_;
-
-public:
-    static unsigned CONSTDATA fractional_width = dfs::width;
-    using precision = typename dfs::precision;
-
-    CONSTCD11 hh_mm_ss() NOEXCEPT
-        : hh_mm_ss(Duration::zero())
-        {}
-
-    CONSTCD11 explicit hh_mm_ss(Duration d) NOEXCEPT
-        : h_(std::chrono::duration_cast<std::chrono::hours>(detail::abs(d)))
-        , m_(std::chrono::duration_cast<std::chrono::minutes>(detail::abs(d)) - h_)
-        , s_(detail::abs(d) - h_ - m_)
-        , neg_(d < Duration::zero())
-        {}
-
-    CONSTCD11 std::chrono::hours hours() const NOEXCEPT {return h_;}
-    CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT {return m_;}
-    CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT {return s_.seconds();}
-    CONSTCD14 std::chrono::seconds&
-        seconds(detail::undocumented) NOEXCEPT {return s_.seconds();}
-    CONSTCD11 precision subseconds() const NOEXCEPT {return s_.subseconds();}
-    CONSTCD11 bool is_negative() const NOEXCEPT {return neg_;}
-
-    CONSTCD11 explicit operator  precision()   const NOEXCEPT {return to_duration();}
-    CONSTCD11          precision to_duration() const NOEXCEPT
-        {return (h_ + m_ + s_.to_duration()) * (1-2*neg_);}
-
-    CONSTCD11 bool in_conventional_range() const NOEXCEPT
-    {
-        return !neg_ && h_ < days{1} && m_ < std::chrono::hours{1} &&
-               s_.in_conventional_range();
-    }
-
-private:
-
-    template <class charT, class traits>
-    friend
-    std::basic_ostream<charT, traits>&
-    operator<<(std::basic_ostream<charT, traits>& os, hh_mm_ss const& tod)
-    {
-        if (tod.is_negative())
-            os << '-';
-        if (tod.h_ < std::chrono::hours{10})
-            os << '0';
-        os << tod.h_.count() << ':';
-        if (tod.m_ < std::chrono::minutes{10})
-            os << '0';
-        os << tod.m_.count() << ':' << tod.s_;
-        return os;
-    }
-
-    template <class CharT, class Traits, class Duration2>
-    friend
-    std::basic_ostream<CharT, Traits>&
-    date::to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-          const fields<Duration2>& fds, const std::string* abbrev,
-          const std::chrono::seconds* offset_sec);
-
-    template <class CharT, class Traits, class Duration2, class Alloc>
-    friend
-    std::basic_istream<CharT, Traits>&
-    date::from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
-          fields<Duration2>& fds,
-          std::basic_string<CharT, Traits, Alloc>* abbrev, std::chrono::minutes* offset);
+class hh_mm_ss {
+  using dfs = detail::decimal_format_seconds<
+      typename std::common_type<Duration, std::chrono::seconds>::type>;
+
+  std::chrono::hours h_;
+  std::chrono::minutes m_;
+  dfs s_;
+  bool neg_;
+
+ public:
+  static unsigned CONSTDATA fractional_width = dfs::width;
+  using precision = typename dfs::precision;
+
+  CONSTCD11 hh_mm_ss() NOEXCEPT : hh_mm_ss(Duration::zero()) {}
+
+  CONSTCD11 explicit hh_mm_ss(Duration d) NOEXCEPT
+      : h_(std::chrono::duration_cast<std::chrono::hours>(detail::abs(d))),
+        m_(std::chrono::duration_cast<std::chrono::minutes>(detail::abs(d)) - h_),
+        s_(detail::abs(d) - h_ - m_),
+        neg_(d < Duration::zero()) {}
+
+  CONSTCD11 std::chrono::hours hours() const NOEXCEPT { return h_; }
+  CONSTCD11 std::chrono::minutes minutes() const NOEXCEPT { return m_; }
+  CONSTCD11 std::chrono::seconds seconds() const NOEXCEPT { return s_.seconds(); }
+  CONSTCD14 std::chrono::seconds& seconds(detail::undocumented) NOEXCEPT {
+    return s_.seconds();
+  }
+  CONSTCD11 precision subseconds() const NOEXCEPT { return s_.subseconds(); }
+  CONSTCD11 bool is_negative() const NOEXCEPT { return neg_; }
+
+  CONSTCD11 explicit operator precision() const NOEXCEPT { return to_duration(); }
+  CONSTCD11 precision to_duration() const NOEXCEPT {
+    return (h_ + m_ + s_.to_duration()) * (1 - 2 * neg_);
+  }
+
+  CONSTCD11 bool in_conventional_range() const NOEXCEPT {
+    return !neg_ && h_ < days{1} && m_ < std::chrono::hours{1} &&
+           s_.in_conventional_range();
+  }
+
+ private:
+  template <class charT, class traits>
+  friend std::basic_ostream<charT, traits>& operator<<(
+      std::basic_ostream<charT, traits>& os, hh_mm_ss const& tod) {
+    if (tod.is_negative()) os << '-';
+    if (tod.h_ < std::chrono::hours{10}) os << '0';
+    os << tod.h_.count() << ':';
+    if (tod.m_ < std::chrono::minutes{10}) os << '0';
+    os << tod.m_.count() << ':' << tod.s_;
+    return os;
+  }
+
+  template <class CharT, class Traits, class Duration2>
+  friend std::basic_ostream<CharT, Traits>& date::to_stream(
+      std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+      const fields<Duration2>& fds, const std::string* abbrev,
+      const std::chrono::seconds* offset_sec);
+
+  template <class CharT, class Traits, class Duration2, class Alloc>
+  friend std::basic_istream<CharT, Traits>& date::from_stream(
+      std::basic_istream<CharT, Traits>& is, const CharT* fmt, fields<Duration2>& fds,
+      std::basic_string<CharT, Traits, Alloc>* abbrev, std::chrono::minutes* offset);
 };
 
-inline
-CONSTCD14
-bool
-is_am(std::chrono::hours const& h) NOEXCEPT
-{
-    using std::chrono::hours;
-    return hours{0} <= h && h < hours{12};
+inline CONSTCD14 bool is_am(std::chrono::hours const& h) NOEXCEPT {
+  using std::chrono::hours;
+  return hours{0} <= h && h < hours{12};
 }
 
-inline
-CONSTCD14
-bool
-is_pm(std::chrono::hours const& h) NOEXCEPT
-{
-    using std::chrono::hours;
-    return hours{12} <= h && h < hours{24};
+inline CONSTCD14 bool is_pm(std::chrono::hours const& h) NOEXCEPT {
+  using std::chrono::hours;
+  return hours{12} <= h && h < hours{24};
 }
 
-inline
-CONSTCD14
-std::chrono::hours
-make12(std::chrono::hours h) NOEXCEPT
-{
-    using std::chrono::hours;
-    if (h < hours{12})
-    {
-        if (h == hours{0})
-            h = hours{12};
-    }
-    else
-    {
-        if (h != hours{12})
-            h -= hours{12};
-    }
-    return h;
+inline CONSTCD14 std::chrono::hours make12(std::chrono::hours h) NOEXCEPT {
+  using std::chrono::hours;
+  if (h < hours{12}) {
+    if (h == hours{0}) h = hours{12};
+  } else {
+    if (h != hours{12}) h -= hours{12};
+  }
+  return h;
 }
 
-inline
-CONSTCD14
-std::chrono::hours
-make24(std::chrono::hours h, bool is_pm) NOEXCEPT
-{
-    using std::chrono::hours;
-    if (is_pm)
-    {
-        if (h != hours{12})
-            h += hours{12};
-    }
-    else if (h == hours{12})
-        h = hours{0};
-    return h;
+inline CONSTCD14 std::chrono::hours make24(std::chrono::hours h, bool is_pm) NOEXCEPT {
+  using std::chrono::hours;
+  if (is_pm) {
+    if (h != hours{12}) h += hours{12};
+  } else if (h == hours{12})
+    h = hours{0};
+  return h;
 }
 
 template <class Duration>
 using time_of_day = hh_mm_ss<Duration>;
 
 template <class Rep, class Period,
-          class = typename std::enable_if
-              <!std::chrono::treat_as_floating_point<Rep>::value>::type>
-CONSTCD11
-inline
-hh_mm_ss<std::chrono::duration<Rep, Period>>
-make_time(const std::chrono::duration<Rep, Period>& d)
-{
-    return hh_mm_ss<std::chrono::duration<Rep, Period>>(d);
+          class = typename std::enable_if<
+              !std::chrono::treat_as_floating_point<Rep>::value>::type>
+CONSTCD11 inline hh_mm_ss<std::chrono::duration<Rep, Period>> make_time(
+    const std::chrono::duration<Rep, Period>& d) {
+  return hh_mm_ss<std::chrono::duration<Rep, Period>>(d);
 }
 
 template <class CharT, class Traits, class Duration>
-inline
-typename std::enable_if
-<
+inline typename std::enable_if<
     !std::chrono::treat_as_floating_point<typename Duration::rep>::value &&
-        std::ratio_less<typename Duration::period, days::period>::value
-    , std::basic_ostream<CharT, Traits>&
->::type
-operator<<(std::basic_ostream<CharT, Traits>& os, const sys_time<Duration>& tp)
-{
-    auto const dp = date::floor<days>(tp);
-    return os << year_month_day(dp) << ' ' << make_time(tp-dp);
+        std::ratio_less<typename Duration::period, days::period>::value,
+    std::basic_ostream<CharT, Traits>&>::type
+operator<<(std::basic_ostream<CharT, Traits>& os, const sys_time<Duration>& tp) {
+  auto const dp = date::floor<days>(tp);
+  return os << year_month_day(dp) << ' ' << make_time(tp - dp);
 }
 
 template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const sys_days& dp)
-{
-    return os << year_month_day(dp);
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const sys_days& dp) {
+  return os << year_month_day(dp);
 }
 
 template <class CharT, class Traits, class Duration>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os, const local_time<Duration>& ut)
-{
-    return (os << sys_time<Duration>{ut.time_since_epoch()});
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const local_time<Duration>& ut) {
+  return (os << sys_time<Duration>{ut.time_since_epoch()});
 }
 
-namespace detail
-{
+namespace detail {
 
 template <class CharT, std::size_t N>
 class string_literal;
 
 template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
-inline
-CONSTCD14
-string_literal<typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
-               N1 + N2 - 1>
-operator+(const string_literal<CharT1, N1>& x, const string_literal<CharT2, N2>& y) NOEXCEPT;
+inline CONSTCD14 string_literal<
+    typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
+    N1 + N2 - 1>
+operator+(const string_literal<CharT1, N1>& x,
+          const string_literal<CharT2, N2>& y) NOEXCEPT;
 
 template <class CharT, std::size_t N>
-class string_literal
-{
-    CharT p_[N];
+class string_literal {
+  CharT p_[N];
 
-    CONSTCD11 string_literal() NOEXCEPT
-      : p_{}
-    {}
+  CONSTCD11 string_literal() NOEXCEPT : p_{} {}
 
-public:
-    using const_iterator = const CharT*;
+ public:
+  using const_iterator = const CharT*;
 
-    string_literal(string_literal const&) = default;
-    string_literal& operator=(string_literal const&) = delete;
+  string_literal(string_literal const&) = default;
+  string_literal& operator=(string_literal const&) = delete;
 
-    template <std::size_t N1 = 2,
-              class = typename std::enable_if<N1 == N>::type>
-    CONSTCD11 string_literal(CharT c) NOEXCEPT
-        : p_{c}
-    {
-    }
+  template <std::size_t N1 = 2, class = typename std::enable_if<N1 == N>::type>
+  CONSTCD11 string_literal(CharT c) NOEXCEPT : p_{c} {}
 
-    template <std::size_t N1 = 3,
-              class = typename std::enable_if<N1 == N>::type>
-    CONSTCD11 string_literal(CharT c1, CharT c2) NOEXCEPT
-        : p_{c1, c2}
-    {
-    }
+  template <std::size_t N1 = 3, class = typename std::enable_if<N1 == N>::type>
+  CONSTCD11 string_literal(CharT c1, CharT c2) NOEXCEPT : p_{c1, c2} {}
 
-    template <std::size_t N1 = 4,
-              class = typename std::enable_if<N1 == N>::type>
-    CONSTCD11 string_literal(CharT c1, CharT c2, CharT c3) NOEXCEPT
-        : p_{c1, c2, c3}
-    {
-    }
+  template <std::size_t N1 = 4, class = typename std::enable_if<N1 == N>::type>
+  CONSTCD11 string_literal(CharT c1, CharT c2, CharT c3) NOEXCEPT : p_{c1, c2, c3} {}
 
-    CONSTCD14 string_literal(const CharT(&a)[N]) NOEXCEPT
-        : p_{}
-    {
-        for (std::size_t i = 0; i < N; ++i)
-            p_[i] = a[i];
-    }
+  CONSTCD14 string_literal(const CharT (&a)[N]) NOEXCEPT : p_{} {
+    for (std::size_t i = 0; i < N; ++i) p_[i] = a[i];
+  }
 
-    template <class U = CharT,
-              class = typename std::enable_if<(1 < sizeof(U))>::type>
-    CONSTCD14 string_literal(const char(&a)[N]) NOEXCEPT
-        : p_{}
-    {
-        for (std::size_t i = 0; i < N; ++i)
-            p_[i] = a[i];
-    }
+  template <class U = CharT, class = typename std::enable_if<(1 < sizeof(U))>::type>
+  CONSTCD14 string_literal(const char (&a)[N]) NOEXCEPT : p_{} {
+    for (std::size_t i = 0; i < N; ++i) p_[i] = a[i];
+  }
 
-    template <class CharT2,
-              class = typename std::enable_if<!std::is_same<CharT2, CharT>::value>::type>
-    CONSTCD14 string_literal(string_literal<CharT2, N> const& a) NOEXCEPT
-        : p_{}
-    {
-        for (std::size_t i = 0; i < N; ++i)
-            p_[i] = a[i];
-    }
+  template <class CharT2,
+            class = typename std::enable_if<!std::is_same<CharT2, CharT>::value>::type>
+  CONSTCD14 string_literal(string_literal<CharT2, N> const& a) NOEXCEPT : p_{} {
+    for (std::size_t i = 0; i < N; ++i) p_[i] = a[i];
+  }
 
-    CONSTCD11 const CharT* data() const NOEXCEPT {return p_;}
-    CONSTCD11 std::size_t size() const NOEXCEPT {return N-1;}
+  CONSTCD11 const CharT* data() const NOEXCEPT { return p_; }
+  CONSTCD11 std::size_t size() const NOEXCEPT { return N - 1; }
 
-    CONSTCD11 const_iterator begin() const NOEXCEPT {return p_;}
-    CONSTCD11 const_iterator end()   const NOEXCEPT {return p_ + N-1;}
+  CONSTCD11 const_iterator begin() const NOEXCEPT { return p_; }
+  CONSTCD11 const_iterator end() const NOEXCEPT { return p_ + N - 1; }
 
-    CONSTCD11 CharT const& operator[](std::size_t n) const NOEXCEPT
-    {
-        return p_[n];
-    }
+  CONSTCD11 CharT const& operator[](std::size_t n) const NOEXCEPT { return p_[n]; }
 
-    template <class Traits>
-    friend
-    std::basic_ostream<CharT, Traits>&
-    operator<<(std::basic_ostream<CharT, Traits>& os, const string_literal& s)
-    {
-        return os << s.p_;
-    }
+  template <class Traits>
+  friend std::basic_ostream<CharT, Traits>& operator<<(
+      std::basic_ostream<CharT, Traits>& os, const string_literal& s) {
+    return os << s.p_;
+  }
 
-    template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
-    friend
-    CONSTCD14
-    string_literal<typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
-                   N1 + N2 - 1>
-    operator+(const string_literal<CharT1, N1>& x, const string_literal<CharT2, N2>& y) NOEXCEPT;
+  template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
+  friend CONSTCD14 string_literal<
+      typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
+      N1 + N2 - 1>
+  operator+(const string_literal<CharT1, N1>& x,
+            const string_literal<CharT2, N2>& y) NOEXCEPT;
 };
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 3>
-operator+(const string_literal<CharT, 2>& x, const string_literal<CharT, 2>& y) NOEXCEPT
-{
+CONSTCD11 inline string_literal<CharT, 3> operator+(
+    const string_literal<CharT, 2>& x, const string_literal<CharT, 2>& y) NOEXCEPT {
   return string_literal<CharT, 3>(x[0], y[0]);
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 4>
-operator+(const string_literal<CharT, 3>& x, const string_literal<CharT, 2>& y) NOEXCEPT
-{
+CONSTCD11 inline string_literal<CharT, 4> operator+(
+    const string_literal<CharT, 3>& x, const string_literal<CharT, 2>& y) NOEXCEPT {
   return string_literal<CharT, 4>(x[0], x[1], y[0]);
 }
 
 template <class CharT1, class CharT2, std::size_t N1, std::size_t N2>
-CONSTCD14
-inline
-string_literal<typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
-               N1 + N2 - 1>
-operator+(const string_literal<CharT1, N1>& x, const string_literal<CharT2, N2>& y) NOEXCEPT
-{
-    using CT = typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type;
+CONSTCD14 inline string_literal<
+    typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type,
+    N1 + N2 - 1>
+operator+(const string_literal<CharT1, N1>& x,
+          const string_literal<CharT2, N2>& y) NOEXCEPT {
+  using CT =
+      typename std::conditional<sizeof(CharT2) <= sizeof(CharT1), CharT1, CharT2>::type;
 
-    string_literal<CT, N1 + N2 - 1> r;
-    std::size_t i = 0;
-    for (; i < N1-1; ++i)
-       r.p_[i] = CT(x.p_[i]);
-    for (std::size_t j = 0; j < N2; ++j, ++i)
-       r.p_[i] = CT(y.p_[j]);
+  string_literal<CT, N1 + N2 - 1> r;
+  std::size_t i = 0;
+  for (; i < N1 - 1; ++i) r.p_[i] = CT(x.p_[i]);
+  for (std::size_t j = 0; j < N2; ++j, ++i) r.p_[i] = CT(y.p_[j]);
 
-    return r;
+  return r;
 }
 
-
 template <class CharT, class Traits, class Alloc, std::size_t N>
-inline
-std::basic_string<CharT, Traits, Alloc>
-operator+(std::basic_string<CharT, Traits, Alloc> x, const string_literal<CharT, N>& y)
-{
-    x.append(y.data(), y.size());
-    return x;
+inline std::basic_string<CharT, Traits, Alloc> operator+(
+    std::basic_string<CharT, Traits, Alloc> x, const string_literal<CharT, N>& y) {
+  x.append(y.data(), y.size());
+  return x;
 }
 
-#if __cplusplus >= 201402  && (!defined(__EDG_VERSION__) || __EDG_VERSION__ > 411) \
-                           && (!defined(__SUNPRO_CC) || __SUNPRO_CC > 0x5150)
+#if __cplusplus >= 201402 && (!defined(__EDG_VERSION__) || __EDG_VERSION__ > 411) && \
+    (!defined(__SUNPRO_CC) || __SUNPRO_CC > 0x5150)
 
 template <class CharT,
-          class = std::enable_if_t<std::is_same<CharT, char>{} ||
-                                   std::is_same<CharT, wchar_t>{} ||
-                                   std::is_same<CharT, char16_t>{} ||
-                                   std::is_same<CharT, char32_t>{}>>
-CONSTCD14
-inline
-string_literal<CharT, 2>
-msl(CharT c) NOEXCEPT
-{
-    return string_literal<CharT, 2>{c};
+          class = std::enable_if_t<
+              std::is_same<CharT, char>{} || std::is_same<CharT, wchar_t>{} ||
+              std::is_same<CharT, char16_t>{} || std::is_same<CharT, char32_t>{}>>
+CONSTCD14 inline string_literal<CharT, 2> msl(CharT c) NOEXCEPT {
+  return string_literal<CharT, 2>{c};
 }
 
 CONSTCD14
-inline
-std::size_t
-to_string_len(std::intmax_t i)
-{
-    std::size_t r = 0;
-    do
-    {
-        i /= 10;
-        ++r;
-    } while (i > 0);
-    return r;
+inline std::size_t to_string_len(std::intmax_t i) {
+  std::size_t r = 0;
+  do {
+    i /= 10;
+    ++r;
+  } while (i > 0);
+  return r;
 }
 
 template <std::intmax_t N>
-CONSTCD14
-inline
-std::enable_if_t
-<
-    N < 10,
-    string_literal<char, to_string_len(N)+1>
->
-msl() NOEXCEPT
-{
-    return msl(char(N % 10 + '0'));
+    CONSTCD14 inline std::enable_if_t <
+    N<10, string_literal<char, to_string_len(N) + 1>> msl() NOEXCEPT {
+  return msl(char(N % 10 + '0'));
 }
 
 template <std::intmax_t N>
-CONSTCD14
-inline
-std::enable_if_t
-<
-    10 <= N,
-    string_literal<char, to_string_len(N)+1>
->
-msl() NOEXCEPT
-{
-    return msl<N/10>() + msl(char(N % 10 + '0'));
+CONSTCD14 inline std::enable_if_t<10 <= N, string_literal<char, to_string_len(N) + 1>>
+msl() NOEXCEPT {
+  return msl<N / 10>() + msl(char(N % 10 + '0'));
 }
 
 template <class CharT, std::intmax_t N, std::intmax_t D>
-CONSTCD14
-inline
-std::enable_if_t
-<
+CONSTCD14 inline std::enable_if_t<
     std::ratio<N, D>::type::den != 1,
     string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) +
-                          to_string_len(std::ratio<N, D>::type::den) + 4>
->
-msl(std::ratio<N, D>) NOEXCEPT
-{
-    using R = typename std::ratio<N, D>::type;
-    return msl(CharT{'['}) + msl<R::num>() + msl(CharT{'/'}) +
-                             msl<R::den>() + msl(CharT{']'});
+                              to_string_len(std::ratio<N, D>::type::den) + 4>>
+msl(std::ratio<N, D>) NOEXCEPT {
+  using R = typename std::ratio<N, D>::type;
+  return msl(CharT{'['}) + msl<R::num>() + msl(CharT{'/'}) + msl<R::den>() +
+         msl(CharT{']'});
 }
 
 template <class CharT, std::intmax_t N, std::intmax_t D>
-CONSTCD14
-inline
-std::enable_if_t
-<
+CONSTCD14 inline std::enable_if_t<
     std::ratio<N, D>::type::den == 1,
-    string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) + 3>
->
-msl(std::ratio<N, D>) NOEXCEPT
-{
-    using R = typename std::ratio<N, D>::type;
-    return msl(CharT{'['}) + msl<R::num>() + msl(CharT{']'});
+    string_literal<CharT, to_string_len(std::ratio<N, D>::type::num) + 3>>
+msl(std::ratio<N, D>) NOEXCEPT {
+  using R = typename std::ratio<N, D>::type;
+  return msl(CharT{'['}) + msl<R::num>() + msl(CharT{']'});
 }
 
+#else  // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__
+       // <= 411)
 
-#else  // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411)
-
-inline
-std::string
-to_string(std::uint64_t x)
-{
-    return std::to_string(x);
-}
+inline std::string to_string(std::uint64_t x) { return std::to_string(x); }
 
 template <class CharT>
-inline
-std::basic_string<CharT>
-to_string(std::uint64_t x)
-{
-    auto y = std::to_string(x);
-    return std::basic_string<CharT>(y.begin(), y.end());
+inline std::basic_string<CharT> to_string(std::uint64_t x) {
+  auto y = std::to_string(x);
+  return std::basic_string<CharT>(y.begin(), y.end());
 }
 
 template <class CharT, std::intmax_t N, std::intmax_t D>
-inline
-typename std::enable_if
-<
-    std::ratio<N, D>::type::den != 1,
-    std::basic_string<CharT>
->::type
-msl(std::ratio<N, D>)
-{
-    using R = typename std::ratio<N, D>::type;
-    return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{'/'} +
-                                              to_string<CharT>(R::den) + CharT{']'};
+inline typename std::enable_if<std::ratio<N, D>::type::den != 1,
+                               std::basic_string<CharT>>::type
+msl(std::ratio<N, D>) {
+  using R = typename std::ratio<N, D>::type;
+  return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{'/'} +
+         to_string<CharT>(R::den) + CharT{']'};
 }
 
 template <class CharT, std::intmax_t N, std::intmax_t D>
-inline
-typename std::enable_if
-<
-    std::ratio<N, D>::type::den == 1,
-    std::basic_string<CharT>
->::type
-msl(std::ratio<N, D>)
-{
-    using R = typename std::ratio<N, D>::type;
-    return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{']'};
+inline typename std::enable_if<std::ratio<N, D>::type::den == 1,
+                               std::basic_string<CharT>>::type
+msl(std::ratio<N, D>) {
+  using R = typename std::ratio<N, D>::type;
+  return std::basic_string<CharT>(1, '[') + to_string<CharT>(R::num) + CharT{']'};
 }
 
-#endif  // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__ <= 411)
+#endif  // __cplusplus < 201402 || (defined(__EDG_VERSION__) && __EDG_VERSION__
+        // <= 411)
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::atto) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'a'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::atto) NOEXCEPT {
+  return string_literal<CharT, 2>{'a'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::femto) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'f'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::femto) NOEXCEPT {
+  return string_literal<CharT, 2>{'f'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::pico) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'p'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::pico) NOEXCEPT {
+  return string_literal<CharT, 2>{'p'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::nano) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'n'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::nano) NOEXCEPT {
+  return string_literal<CharT, 2>{'n'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-typename std::enable_if
-<
-    std::is_same<CharT, char>::value,
-    string_literal<char, 3>
->::type
-msl(std::micro) NOEXCEPT
-{
-    return string_literal<char, 3>{'\xC2', '\xB5'};
+CONSTCD11 inline typename std::enable_if<std::is_same<CharT, char>::value,
+                                         string_literal<char, 3>>::type
+msl(std::micro) NOEXCEPT {
+  return string_literal<char, 3>{'\xC2', '\xB5'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-typename std::enable_if
-<
-    !std::is_same<CharT, char>::value,
-    string_literal<CharT, 2>
->::type
-msl(std::micro) NOEXCEPT
-{
-    return string_literal<CharT, 2>{CharT{static_cast<unsigned char>('\xB5')}};
+CONSTCD11 inline typename std::enable_if<!std::is_same<CharT, char>::value,
+                                         string_literal<CharT, 2>>::type
+msl(std::micro) NOEXCEPT {
+  return string_literal<CharT, 2>{CharT{static_cast<unsigned char>('\xB5')}};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::milli) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'m'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::milli) NOEXCEPT {
+  return string_literal<CharT, 2>{'m'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::centi) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'c'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::centi) NOEXCEPT {
+  return string_literal<CharT, 2>{'c'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 3>
-msl(std::deca) NOEXCEPT
-{
-    return string_literal<CharT, 3>{'d', 'a'};
+CONSTCD11 inline string_literal<CharT, 3> msl(std::deca) NOEXCEPT {
+  return string_literal<CharT, 3>{'d', 'a'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::deci) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'d'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::deci) NOEXCEPT {
+  return string_literal<CharT, 2>{'d'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::hecto) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'h'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::hecto) NOEXCEPT {
+  return string_literal<CharT, 2>{'h'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::kilo) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'k'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::kilo) NOEXCEPT {
+  return string_literal<CharT, 2>{'k'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::mega) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'M'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::mega) NOEXCEPT {
+  return string_literal<CharT, 2>{'M'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::giga) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'G'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::giga) NOEXCEPT {
+  return string_literal<CharT, 2>{'G'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::tera) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'T'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::tera) NOEXCEPT {
+  return string_literal<CharT, 2>{'T'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::peta) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'P'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::peta) NOEXCEPT {
+  return string_literal<CharT, 2>{'P'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-msl(std::exa) NOEXCEPT
-{
-    return string_literal<CharT, 2>{'E'};
+CONSTCD11 inline string_literal<CharT, 2> msl(std::exa) NOEXCEPT {
+  return string_literal<CharT, 2>{'E'};
 }
 
 template <class CharT, class Period>
-CONSTCD11
-inline
-auto
-get_units(Period p)
- -> decltype(msl<CharT>(p) + string_literal<CharT, 2>{'s'})
-{
-    return msl<CharT>(p) + string_literal<CharT, 2>{'s'};
+CONSTCD11 inline auto get_units(Period p)
+    -> decltype(msl<CharT>(p) + string_literal<CharT, 2>{'s'}) {
+  return msl<CharT>(p) + string_literal<CharT, 2>{'s'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-get_units(std::ratio<1>)
-{
-    return string_literal<CharT, 2>{'s'};
+CONSTCD11 inline string_literal<CharT, 2> get_units(std::ratio<1>) {
+  return string_literal<CharT, 2>{'s'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-get_units(std::ratio<3600>)
-{
-    return string_literal<CharT, 2>{'h'};
+CONSTCD11 inline string_literal<CharT, 2> get_units(std::ratio<3600>) {
+  return string_literal<CharT, 2>{'h'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 4>
-get_units(std::ratio<60>)
-{
-    return string_literal<CharT, 4>{'m', 'i', 'n'};
+CONSTCD11 inline string_literal<CharT, 4> get_units(std::ratio<60>) {
+  return string_literal<CharT, 4>{'m', 'i', 'n'};
 }
 
 template <class CharT>
-CONSTCD11
-inline
-string_literal<CharT, 2>
-get_units(std::ratio<86400>)
-{
-    return string_literal<CharT, 2>{'d'};
+CONSTCD11 inline string_literal<CharT, 2> get_units(std::ratio<86400>) {
+  return string_literal<CharT, 2>{'d'};
 }
 
 template <class CharT, class Traits = std::char_traits<CharT>>
 struct make_string;
 
 template <>
-struct make_string<char>
-{
-    template <class Rep>
-    static
-    std::string
-    from(Rep n)
-    {
-        return std::to_string(n);
-    }
+struct make_string<char> {
+  template <class Rep>
+  static std::string from(Rep n) {
+    return std::to_string(n);
+  }
 };
 
 template <class Traits>
-struct make_string<char, Traits>
-{
-    template <class Rep>
-    static
-    std::basic_string<char, Traits>
-    from(Rep n)
-    {
-        auto s = std::to_string(n);
-        return std::basic_string<char, Traits>(s.begin(), s.end());
-    }
+struct make_string<char, Traits> {
+  template <class Rep>
+  static std::basic_string<char, Traits> from(Rep n) {
+    auto s = std::to_string(n);
+    return std::basic_string<char, Traits>(s.begin(), s.end());
+  }
 };
 
 template <>
-struct make_string<wchar_t>
-{
-    template <class Rep>
-    static
-    std::wstring
-    from(Rep n)
-    {
-        return std::to_wstring(n);
-    }
+struct make_string<wchar_t> {
+  template <class Rep>
+  static std::wstring from(Rep n) {
+    return std::to_wstring(n);
+  }
 };
 
 template <class Traits>
-struct make_string<wchar_t, Traits>
-{
-    template <class Rep>
-    static
-    std::basic_string<wchar_t, Traits>
-    from(Rep n)
-    {
-        auto s = std::to_wstring(n);
-        return std::basic_string<wchar_t, Traits>(s.begin(), s.end());
-    }
+struct make_string<wchar_t, Traits> {
+  template <class Rep>
+  static std::basic_string<wchar_t, Traits> from(Rep n) {
+    auto s = std::to_wstring(n);
+    return std::basic_string<wchar_t, Traits>(s.begin(), s.end());
+  }
 };
 
 }  // namespace detail
@@ -4563,254 +3449,171 @@ struct make_string<wchar_t, Traits>
 CONSTDATA year nanyear{-32768};
 
 template <class Duration>
-struct fields
-{
-    year_month_day        ymd{nanyear/0/0};
-    weekday               wd{8u};
-    hh_mm_ss<Duration>    tod{};
-    bool                  has_tod = false;
-
-    fields() = default;
-
-    fields(year_month_day ymd_) : ymd(ymd_) {}
-    fields(weekday wd_) : wd(wd_) {}
-    fields(hh_mm_ss<Duration> tod_) : tod(tod_), has_tod(true) {}
-
-    fields(year_month_day ymd_, weekday wd_) : ymd(ymd_), wd(wd_) {}
-    fields(year_month_day ymd_, hh_mm_ss<Duration> tod_) : ymd(ymd_), tod(tod_),
-                                                           has_tod(true) {}
-
-    fields(weekday wd_, hh_mm_ss<Duration> tod_) : wd(wd_), tod(tod_), has_tod(true) {}
-
-    fields(year_month_day ymd_, weekday wd_, hh_mm_ss<Duration> tod_)
-        : ymd(ymd_)
-        , wd(wd_)
-        , tod(tod_)
-        , has_tod(true)
-        {}
+struct fields {
+  year_month_day ymd{nanyear / 0 / 0};
+  weekday wd{8u};
+  hh_mm_ss<Duration> tod{};
+  bool has_tod = false;
+
+  fields() = default;
+
+  fields(year_month_day ymd_) : ymd(ymd_) {}
+  fields(weekday wd_) : wd(wd_) {}
+  fields(hh_mm_ss<Duration> tod_) : tod(tod_), has_tod(true) {}
+
+  fields(year_month_day ymd_, weekday wd_) : ymd(ymd_), wd(wd_) {}
+  fields(year_month_day ymd_, hh_mm_ss<Duration> tod_)
+      : ymd(ymd_), tod(tod_), has_tod(true) {}
+
+  fields(weekday wd_, hh_mm_ss<Duration> tod_) : wd(wd_), tod(tod_), has_tod(true) {}
+
+  fields(year_month_day ymd_, weekday wd_, hh_mm_ss<Duration> tod_)
+      : ymd(ymd_), wd(wd_), tod(tod_), has_tod(true) {}
 };
 
-namespace detail
-{
+namespace detail {
 
 template <class CharT, class Traits, class Duration>
-unsigned
-extract_weekday(std::basic_ostream<CharT, Traits>& os, const fields<Duration>& fds)
-{
-    if (!fds.ymd.ok() && !fds.wd.ok())
-    {
-        // fds does not contain a valid weekday
-        os.setstate(std::ios::failbit);
-        return 8;
-    }
-    weekday wd;
-    if (fds.ymd.ok())
-    {
-        wd = weekday{sys_days(fds.ymd)};
-        if (fds.wd.ok() && wd != fds.wd)
-        {
-            // fds.ymd and fds.wd are inconsistent
-            os.setstate(std::ios::failbit);
-            return 8;
-        }
+unsigned extract_weekday(std::basic_ostream<CharT, Traits>& os,
+                         const fields<Duration>& fds) {
+  if (!fds.ymd.ok() && !fds.wd.ok()) {
+    // fds does not contain a valid weekday
+    os.setstate(std::ios::failbit);
+    return 8;
+  }
+  weekday wd;
+  if (fds.ymd.ok()) {
+    wd = weekday{sys_days(fds.ymd)};
+    if (fds.wd.ok() && wd != fds.wd) {
+      // fds.ymd and fds.wd are inconsistent
+      os.setstate(std::ios::failbit);
+      return 8;
     }
-    else
-        wd = fds.wd;
-    return static_cast<unsigned>((wd - Sunday).count());
+  } else
+    wd = fds.wd;
+  return static_cast<unsigned>((wd - Sunday).count());
 }
 
 template <class CharT, class Traits, class Duration>
-unsigned
-extract_month(std::basic_ostream<CharT, Traits>& os, const fields<Duration>& fds)
-{
-    if (!fds.ymd.month().ok())
-    {
-        // fds does not contain a valid month
-        os.setstate(std::ios::failbit);
-        return 0;
-    }
-    return static_cast<unsigned>(fds.ymd.month());
+unsigned extract_month(std::basic_ostream<CharT, Traits>& os,
+                       const fields<Duration>& fds) {
+  if (!fds.ymd.month().ok()) {
+    // fds does not contain a valid month
+    os.setstate(std::ios::failbit);
+    return 0;
+  }
+  return static_cast<unsigned>(fds.ymd.month());
 }
 
 }  // namespace detail
 
 #if ONLY_C_LOCALE
 
-namespace detail
-{
-
-inline
-std::pair<const std::string*, const std::string*>
-weekday_names()
-{
-    static const std::string nm[] =
-    {
-        "Sunday",
-        "Monday",
-        "Tuesday",
-        "Wednesday",
-        "Thursday",
-        "Friday",
-        "Saturday",
-        "Sun",
-        "Mon",
-        "Tue",
-        "Wed",
-        "Thu",
-        "Fri",
-        "Sat"
-    };
-    return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
-}
-
-inline
-std::pair<const std::string*, const std::string*>
-month_names()
-{
-    static const std::string nm[] =
-    {
-        "January",
-        "February",
-        "March",
-        "April",
-        "May",
-        "June",
-        "July",
-        "August",
-        "September",
-        "October",
-        "November",
-        "December",
-        "Jan",
-        "Feb",
-        "Mar",
-        "Apr",
-        "May",
-        "Jun",
-        "Jul",
-        "Aug",
-        "Sep",
-        "Oct",
-        "Nov",
-        "Dec"
-    };
-    return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
-}
-
-inline
-std::pair<const std::string*, const std::string*>
-ampm_names()
-{
-    static const std::string nm[] =
-    {
-        "AM",
-        "PM"
-    };
-    return std::make_pair(nm, nm+sizeof(nm)/sizeof(nm[0]));
+namespace detail {
+
+inline std::pair<const std::string*, const std::string*> weekday_names() {
+  static const std::string nm[] = {
+      "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday",
+      "Sun",    "Mon",    "Tue",     "Wed",       "Thu",      "Fri",    "Sat"};
+  return std::make_pair(nm, nm + sizeof(nm) / sizeof(nm[0]));
+}
+
+inline std::pair<const std::string*, const std::string*> month_names() {
+  static const std::string nm[] = {
+      "January",   "February", "March",    "April",    "May", "June", "July", "August",
+      "September", "October",  "November", "December", "Jan", "Feb",  "Mar",  "Apr",
+      "May",       "Jun",      "Jul",      "Aug",      "Sep", "Oct",  "Nov",  "Dec"};
+  return std::make_pair(nm, nm + sizeof(nm) / sizeof(nm[0]));
+}
+
+inline std::pair<const std::string*, const std::string*> ampm_names() {
+  static const std::string nm[] = {"AM", "PM"};
+  return std::make_pair(nm, nm + sizeof(nm) / sizeof(nm[0]));
 }
 
 template <class CharT, class Traits, class FwdIter>
-FwdIter
-scan_keyword(std::basic_istream<CharT, Traits>& is, FwdIter kb, FwdIter ke)
-{
-    size_t nkw = static_cast<size_t>(std::distance(kb, ke));
-    const unsigned char doesnt_match = '\0';
-    const unsigned char might_match = '\1';
-    const unsigned char does_match = '\2';
-    unsigned char statbuf[100];
-    unsigned char* status = statbuf;
-    std::unique_ptr<unsigned char, void(*)(void*)> stat_hold(0, free);
-    if (nkw > sizeof(statbuf))
-    {
-        status = (unsigned char*)std::malloc(nkw);
-        if (status == nullptr)
-            throw std::bad_alloc();
-        stat_hold.reset(status);
+FwdIter scan_keyword(std::basic_istream<CharT, Traits>& is, FwdIter kb, FwdIter ke) {
+  size_t nkw = static_cast<size_t>(std::distance(kb, ke));
+  const unsigned char doesnt_match = '\0';
+  const unsigned char might_match = '\1';
+  const unsigned char does_match = '\2';
+  unsigned char statbuf[100];
+  unsigned char* status = statbuf;
+  std::unique_ptr<unsigned char, void (*)(void*)> stat_hold(0, free);
+  if (nkw > sizeof(statbuf)) {
+    status = (unsigned char*)std::malloc(nkw);
+    if (status == nullptr) throw std::bad_alloc();
+    stat_hold.reset(status);
+  }
+  size_t n_might_match = nkw;  // At this point, any keyword might match
+  size_t n_does_match = 0;     // but none of them definitely do
+  // Initialize all statuses to might_match, except for "" keywords are
+  // does_match
+  unsigned char* st = status;
+  for (auto ky = kb; ky != ke; ++ky, ++st) {
+    if (!ky->empty())
+      *st = might_match;
+    else {
+      *st = does_match;
+      --n_might_match;
+      ++n_does_match;
     }
-    size_t n_might_match = nkw;  // At this point, any keyword might match
-    size_t n_does_match = 0;     // but none of them definitely do
-    // Initialize all statuses to might_match, except for "" keywords are does_match
-    unsigned char* st = status;
-    for (auto ky = kb; ky != ke; ++ky, ++st)
-    {
-        if (!ky->empty())
-            *st = might_match;
-        else
-        {
+  }
+  // While there might be a match, test keywords against the next CharT
+  for (size_t indx = 0; is && n_might_match > 0; ++indx) {
+    // Peek at the next CharT but don't consume it
+    auto ic = is.peek();
+    if (ic == EOF) {
+      is.setstate(std::ios::eofbit);
+      break;
+    }
+    auto c = static_cast<char>(toupper(ic));
+    bool consume = false;
+    // For each keyword which might match, see if the indx character is c
+    // If a match if found, consume c
+    // If a match is found, and that is the last character in the keyword,
+    //    then that keyword matches.
+    // If the keyword doesn't match this character, then change the keyword
+    //    to doesn't match
+    st = status;
+    for (auto ky = kb; ky != ke; ++ky, ++st) {
+      if (*st == might_match) {
+        if (c == static_cast<char>(toupper((*ky)[indx]))) {
+          consume = true;
+          if (ky->size() == indx + 1) {
             *st = does_match;
             --n_might_match;
             ++n_does_match;
+          }
+        } else {
+          *st = doesnt_match;
+          --n_might_match;
         }
+      }
     }
-    // While there might be a match, test keywords against the next CharT
-    for (size_t indx = 0; is && n_might_match > 0; ++indx)
-    {
-        // Peek at the next CharT but don't consume it
-        auto ic = is.peek();
-        if (ic == EOF)
-        {
-            is.setstate(std::ios::eofbit);
-            break;
-        }
-        auto c = static_cast<char>(toupper(ic));
-        bool consume = false;
-        // For each keyword which might match, see if the indx character is c
-        // If a match if found, consume c
-        // If a match is found, and that is the last character in the keyword,
-        //    then that keyword matches.
-        // If the keyword doesn't match this character, then change the keyword
-        //    to doesn't match
+    // consume if we matched a character
+    if (consume) {
+      (void)is.get();
+      // If we consumed a character and there might be a matched keyword that
+      //   was marked matched on a previous iteration, then such keywords
+      //   are now marked as not matching.
+      if (n_might_match + n_does_match > 1) {
         st = status;
-        for (auto ky = kb; ky != ke; ++ky, ++st)
-        {
-            if (*st == might_match)
-            {
-                if (c == static_cast<char>(toupper((*ky)[indx])))
-                {
-                    consume = true;
-                    if (ky->size() == indx+1)
-                    {
-                        *st = does_match;
-                        --n_might_match;
-                        ++n_does_match;
-                    }
-                }
-                else
-                {
-                    *st = doesnt_match;
-                    --n_might_match;
-                }
-            }
-        }
-        // consume if we matched a character
-        if (consume)
-        {
-            (void)is.get();
-            // If we consumed a character and there might be a matched keyword that
-            //   was marked matched on a previous iteration, then such keywords
-            //   are now marked as not matching.
-            if (n_might_match + n_does_match > 1)
-            {
-                st = status;
-                for (auto ky = kb; ky != ke; ++ky, ++st)
-                {
-                    if (*st == does_match && ky->size() != indx+1)
-                    {
-                        *st = doesnt_match;
-                        --n_does_match;
-                    }
-                }
-            }
+        for (auto ky = kb; ky != ke; ++ky, ++st) {
+          if (*st == does_match && ky->size() != indx + 1) {
+            *st = doesnt_match;
+            --n_does_match;
+          }
         }
+      }
     }
-    // We've exited the loop because we hit eof and/or we have no more "might matches".
-    // Return the first matching result
-    for (st = status; kb != ke; ++kb, ++st)
-        if (*st == does_match)
-            break;
-    if (kb == ke)
-        is.setstate(std::ios::failbit);
-    return kb;
+  }
+  // We've exited the loop because we hit eof and/or we have no more "might
+  // matches". Return the first matching result
+  for (st = status; kb != ke; ++kb, ++st)
+    if (*st == does_match) break;
+  if (kb == ke) is.setstate(std::ios::failbit);
+  return kb;
 }
 
 }  // namespace detail
@@ -4818,3143 +3621,2566 @@ scan_keyword(std::basic_istream<CharT, Traits>& is, FwdIter kb, FwdIter ke)
 #endif  // ONLY_C_LOCALE
 
 template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-          const fields<Duration>& fds, const std::string* abbrev,
-          const std::chrono::seconds* offset_sec)
-{
+std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
+                                             const CharT* fmt,
+                                             const fields<Duration>& fds,
+                                             const std::string* abbrev,
+                                             const std::chrono::seconds* offset_sec) {
 #if ONLY_C_LOCALE
-    using detail::weekday_names;
-    using detail::month_names;
-    using detail::ampm_names;
+  using detail::ampm_names;
+  using detail::month_names;
+  using detail::weekday_names;
 #endif
-    using detail::save_ostream;
-    using detail::get_units;
-    using detail::extract_weekday;
-    using detail::extract_month;
-    using std::ios;
-    using std::chrono::duration_cast;
-    using std::chrono::seconds;
-    using std::chrono::minutes;
-    using std::chrono::hours;
-    date::detail::save_ostream<CharT, Traits> ss(os);
-    os.fill(' ');
-    os.flags(std::ios::skipws | std::ios::dec);
-    os.width(0);
-    tm tm{};
-    bool insert_negative = fds.has_tod && fds.tod.to_duration() < Duration::zero();
+  using detail::extract_month;
+  using detail::extract_weekday;
+  using detail::get_units;
+  using detail::save_ostream;
+  using std::ios;
+  using std::chrono::duration_cast;
+  using std::chrono::hours;
+  using std::chrono::minutes;
+  using std::chrono::seconds;
+  date::detail::save_ostream<CharT, Traits> ss(os);
+  os.fill(' ');
+  os.flags(std::ios::skipws | std::ios::dec);
+  os.width(0);
+  tm tm{};
+  bool insert_negative = fds.has_tod && fds.tod.to_duration() < Duration::zero();
 #if !ONLY_C_LOCALE
-    auto& facet = std::use_facet<std::time_put<CharT>>(os.getloc());
+  auto& facet = std::use_facet<std::time_put<CharT>>(os.getloc());
 #endif
-    const CharT* command = nullptr;
-    CharT modified = CharT{};
-    for (; *fmt; ++fmt)
-    {
-        switch (*fmt)
-        {
-        case 'a':
-        case 'A':
-            if (command)
-            {
-                if (modified == CharT{})
-                {
-                    tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
-                    if (os.fail())
-                        return os;
+  const CharT* command = nullptr;
+  CharT modified = CharT{};
+  for (; *fmt; ++fmt) {
+    switch (*fmt) {
+      case 'a':
+      case 'A':
+        if (command) {
+          if (modified == CharT{}) {
+            tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+            if (os.fail()) return os;
 #if !ONLY_C_LOCALE
-                    const CharT f[] = {'%', *fmt};
-                    facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-#else  // ONLY_C_LOCALE
-                    os << weekday_names().first[tm.tm_wday+7*(*fmt == 'a')];
+            const CharT f[] = {'%', *fmt};
+            facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+#else   // ONLY_C_LOCALE
+            os << weekday_names().first[tm.tm_wday + 7 * (*fmt == 'a')];
 #endif  // ONLY_C_LOCALE
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'b':
-        case 'B':
-        case 'h':
-            if (command)
-            {
-                if (modified == CharT{})
-                {
-                    tm.tm_mon = static_cast<int>(extract_month(os, fds)) - 1;
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'b':
+      case 'B':
+      case 'h':
+        if (command) {
+          if (modified == CharT{}) {
+            tm.tm_mon = static_cast<int>(extract_month(os, fds)) - 1;
 #if !ONLY_C_LOCALE
-                    const CharT f[] = {'%', *fmt};
-                    facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-#else  // ONLY_C_LOCALE
-                    os << month_names().first[tm.tm_mon+12*(*fmt != 'B')];
+            const CharT f[] = {'%', *fmt};
+            facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+#else   // ONLY_C_LOCALE
+            os << month_names().first[tm.tm_mon + 12 * (*fmt != 'B')];
 #endif  // ONLY_C_LOCALE
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'c':
-        case 'x':
-            if (command)
-            {
-                if (modified == CharT{'O'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    if (!fds.ymd.ok())
-                        os.setstate(std::ios::failbit);
-                    if (*fmt == 'c' && !fds.has_tod)
-                        os.setstate(std::ios::failbit);
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'c':
+      case 'x':
+        if (command) {
+          if (modified == CharT{'O'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            if (!fds.ymd.ok()) os.setstate(std::ios::failbit);
+            if (*fmt == 'c' && !fds.has_tod) os.setstate(std::ios::failbit);
 #if !ONLY_C_LOCALE
-                    tm = std::tm{};
-                    auto const& ymd = fds.ymd;
-                    auto ld = local_days(ymd);
-                    if (*fmt == 'c')
-                    {
-                        tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
-                        tm.tm_min = static_cast<int>(fds.tod.minutes().count());
-                        tm.tm_hour = static_cast<int>(fds.tod.hours().count());
-                    }
-                    tm.tm_mday = static_cast<int>(static_cast<unsigned>(ymd.day()));
-                    tm.tm_mon = static_cast<int>(extract_month(os, fds) - 1);
-                    tm.tm_year = static_cast<int>(ymd.year()) - 1900;
-                    tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
-                    if (os.fail())
-                        return os;
-                    tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
-                    CharT f[3] = {'%'};
-                    auto fe = std::begin(f) + 1;
-                    if (modified == CharT{'E'})
-                        *fe++ = modified;
-                    *fe++ = *fmt;
-                    facet.put(os, os, os.fill(), &tm, std::begin(f), fe);
-#else  // ONLY_C_LOCALE
-                    if (*fmt == 'c')
-                    {
-                        auto wd = static_cast<int>(extract_weekday(os, fds));
-                        os << weekday_names().first[static_cast<unsigned>(wd)+7]
-                           << ' ';
-                        os << month_names().first[extract_month(os, fds)-1+12] << ' ';
-                        auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
-                        if (d < 10)
-                            os << ' ';
-                        os << d << ' '
-                           << make_time(duration_cast<seconds>(fds.tod.to_duration()))
-                           << ' ' << fds.ymd.year();
-
-                    }
-                    else  // *fmt == 'x'
-                    {
-                        auto const& ymd = fds.ymd;
-                        save_ostream<CharT, Traits> _(os);
-                        os.fill('0');
-                        os.flags(std::ios::dec | std::ios::right);
-                        os.width(2);
-                        os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
-                        os.width(2);
-                        os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
-                        os.width(2);
-                        os << static_cast<int>(ymd.year()) % 100;
-                    }
-#endif  // ONLY_C_LOCALE
-                }
-                command = nullptr;
-                modified = CharT{};
+            tm = std::tm{};
+            auto const& ymd = fds.ymd;
+            auto ld = local_days(ymd);
+            if (*fmt == 'c') {
+              tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
+              tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+              tm.tm_hour = static_cast<int>(fds.tod.hours().count());
             }
-            else
-                os << *fmt;
-            break;
-        case 'C':
-            if (command)
+            tm.tm_mday = static_cast<int>(static_cast<unsigned>(ymd.day()));
+            tm.tm_mon = static_cast<int>(extract_month(os, fds) - 1);
+            tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+            tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+            if (os.fail()) return os;
+            tm.tm_yday = static_cast<int>((ld - local_days(ymd.year() / 1 / 1)).count());
+            CharT f[3] = {'%'};
+            auto fe = std::begin(f) + 1;
+            if (modified == CharT{'E'}) *fe++ = modified;
+            *fe++ = *fmt;
+            facet.put(os, os, os.fill(), &tm, std::begin(f), fe);
+#else   // ONLY_C_LOCALE
+            if (*fmt == 'c') {
+              auto wd = static_cast<int>(extract_weekday(os, fds));
+              os << weekday_names().first[static_cast<unsigned>(wd) + 7] << ' ';
+              os << month_names().first[extract_month(os, fds) - 1 + 12] << ' ';
+              auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
+              if (d < 10) os << ' ';
+              os << d << ' ' << make_time(duration_cast<seconds>(fds.tod.to_duration()))
+                 << ' ' << fds.ymd.year();
+
+            } else  // *fmt == 'x'
             {
-                if (modified == CharT{'O'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    if (!fds.ymd.year().ok())
-                        os.setstate(std::ios::failbit);
-                    auto y = static_cast<int>(fds.ymd.year());
-#if !ONLY_C_LOCALE
-                    if (modified == CharT{})
-#endif
-                    {
-                        save_ostream<CharT, Traits> _(os);
-                        os.fill('0');
-                        os.flags(std::ios::dec | std::ios::right);
-                        if (y >= 0)
-                        {
-                            os.width(2);
-                            os << y/100;
-                        }
-                        else
-                        {
-                            os << CharT{'-'};
-                            os.width(2);
-                            os << -(y-99)/100;
-                        }
-                    }
+              auto const& ymd = fds.ymd;
+              save_ostream<CharT, Traits> _(os);
+              os.fill('0');
+              os.flags(std::ios::dec | std::ios::right);
+              os.width(2);
+              os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
+              os.width(2);
+              os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
+              os.width(2);
+              os << static_cast<int>(ymd.year()) % 100;
+            }
+#endif  // ONLY_C_LOCALE
+          }
+          command = nullptr;
+          modified = CharT{};
+        } else
+          os << *fmt;
+        break;
+      case 'C':
+        if (command) {
+          if (modified == CharT{'O'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            if (!fds.ymd.year().ok()) os.setstate(std::ios::failbit);
+            auto y = static_cast<int>(fds.ymd.year());
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'E'})
-                    {
-                        tm.tm_year = y - 1900;
-                        CharT f[3] = {'%', 'E', 'C'};
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
+            if (modified == CharT{})
 #endif
-                }
-                command = nullptr;
-                modified = CharT{};
-            }
-            else
-                os << *fmt;
-            break;
-        case 'd':
-        case 'e':
-            if (command)
             {
-                if (modified == CharT{'E'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    if (!fds.ymd.day().ok())
-                        os.setstate(std::ios::failbit);
-                    auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
+              save_ostream<CharT, Traits> _(os);
+              os.fill('0');
+              os.flags(std::ios::dec | std::ios::right);
+              if (y >= 0) {
+                os.width(2);
+                os << y / 100;
+              } else {
+                os << CharT{'-'};
+                os.width(2);
+                os << -(y - 99) / 100;
+              }
+            }
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            else if (modified == CharT{'E'}) {
+              tm.tm_year = y - 1900;
+              CharT f[3] = {'%', 'E', 'C'};
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+            }
 #endif
-                    {
-                        save_ostream<CharT, Traits> _(os);
-                        if (*fmt == CharT{'d'})
-                            os.fill('0');
-                        else
-                            os.fill(' ');
-                        os.flags(std::ios::dec | std::ios::right);
-                        os.width(2);
-                        os << d;
-                    }
+          }
+          command = nullptr;
+          modified = CharT{};
+        } else
+          os << *fmt;
+        break;
+      case 'd':
+      case 'e':
+        if (command) {
+          if (modified == CharT{'E'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            if (!fds.ymd.day().ok()) os.setstate(std::ios::failbit);
+            auto d = static_cast<int>(static_cast<unsigned>(fds.ymd.day()));
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        tm.tm_mday = d;
-                        CharT f[3] = {'%', 'O', *fmt};
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
+            if (modified == CharT{})
 #endif
-                }
-                command = nullptr;
-                modified = CharT{};
-            }
-            else
-                os << *fmt;
-            break;
-        case 'D':
-            if (command)
-            {
-                if (modified == CharT{})
-                {
-                    if (!fds.ymd.ok())
-                        os.setstate(std::ios::failbit);
-                    auto const& ymd = fds.ymd;
-                    save_ostream<CharT, Traits> _(os);
-                    os.fill('0');
-                    os.flags(std::ios::dec | std::ios::right);
-                    os.width(2);
-                    os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
-                    os.width(2);
-                    os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
-                    os.width(2);
-                    os << static_cast<int>(ymd.year()) % 100;
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'F':
-            if (command)
-            {
-                if (modified == CharT{})
-                {
-                    if (!fds.ymd.ok())
-                        os.setstate(std::ios::failbit);
-                    auto const& ymd = fds.ymd;
-                    save_ostream<CharT, Traits> _(os);
-                    os.fill('0');
-                    os.flags(std::ios::dec | std::ios::right);
-                    os.width(4);
-                    os << static_cast<int>(ymd.year()) << CharT{'-'};
-                    os.width(2);
-                    os << static_cast<unsigned>(ymd.month()) << CharT{'-'};
-                    os.width(2);
-                    os << static_cast<unsigned>(ymd.day());
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'g':
-        case 'G':
-            if (command)
             {
-                if (modified == CharT{})
-                {
-                    if (!fds.ymd.ok())
-                        os.setstate(std::ios::failbit);
-                    auto ld = local_days(fds.ymd);
-                    auto y = year_month_day{ld + days{3}}.year();
-                    auto start = local_days((y-years{1})/December/Thursday[last]) +
-                                 (Monday-Thursday);
-                    if (ld < start)
-                        --y;
-                    if (*fmt == CharT{'G'})
-                        os << y;
-                    else
-                    {
-                        save_ostream<CharT, Traits> _(os);
-                        os.fill('0');
-                        os.flags(std::ios::dec | std::ios::right);
-                        os.width(2);
-                        os << std::abs(static_cast<int>(y)) % 100;
-                    }
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
+              save_ostream<CharT, Traits> _(os);
+              if (*fmt == CharT{'d'})
+                os.fill('0');
+              else
+                os.fill(' ');
+              os.flags(std::ios::dec | std::ios::right);
+              os.width(2);
+              os << d;
             }
-            else
-                os << *fmt;
-            break;
-        case 'H':
-        case 'I':
-            if (command)
-            {
-                if (modified == CharT{'E'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    if (!fds.has_tod)
-                        os.setstate(std::ios::failbit);
-                    if (insert_negative)
-                    {
-                        os << '-';
-                        insert_negative = false;
-                    }
-                    auto hms = fds.tod;
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            else if (modified == CharT{'O'}) {
+              tm.tm_mday = d;
+              CharT f[3] = {'%', 'O', *fmt};
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+            }
 #endif
-                    {
-                        auto h = *fmt == CharT{'I'} ? make12(hms.hours()) : hms.hours();
-                        if (h < hours{10})
-                            os << CharT{'0'};
-                        os << h.count();
-                    }
+          }
+          command = nullptr;
+          modified = CharT{};
+        } else
+          os << *fmt;
+        break;
+      case 'D':
+        if (command) {
+          if (modified == CharT{}) {
+            if (!fds.ymd.ok()) os.setstate(std::ios::failbit);
+            auto const& ymd = fds.ymd;
+            save_ostream<CharT, Traits> _(os);
+            os.fill('0');
+            os.flags(std::ios::dec | std::ios::right);
+            os.width(2);
+            os << static_cast<unsigned>(ymd.month()) << CharT{'/'};
+            os.width(2);
+            os << static_cast<unsigned>(ymd.day()) << CharT{'/'};
+            os.width(2);
+            os << static_cast<int>(ymd.year()) % 100;
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'F':
+        if (command) {
+          if (modified == CharT{}) {
+            if (!fds.ymd.ok()) os.setstate(std::ios::failbit);
+            auto const& ymd = fds.ymd;
+            save_ostream<CharT, Traits> _(os);
+            os.fill('0');
+            os.flags(std::ios::dec | std::ios::right);
+            os.width(4);
+            os << static_cast<int>(ymd.year()) << CharT{'-'};
+            os.width(2);
+            os << static_cast<unsigned>(ymd.month()) << CharT{'-'};
+            os.width(2);
+            os << static_cast<unsigned>(ymd.day());
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'g':
+      case 'G':
+        if (command) {
+          if (modified == CharT{}) {
+            if (!fds.ymd.ok()) os.setstate(std::ios::failbit);
+            auto ld = local_days(fds.ymd);
+            auto y = year_month_day{ld + days{3}}.year();
+            auto start = local_days((y - years{1}) / December / Thursday[last]) +
+                         (Monday - Thursday);
+            if (ld < start) --y;
+            if (*fmt == CharT{'G'})
+              os << y;
+            else {
+              save_ostream<CharT, Traits> _(os);
+              os.fill('0');
+              os.flags(std::ios::dec | std::ios::right);
+              os.width(2);
+              os << std::abs(static_cast<int>(y)) % 100;
+            }
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'H':
+      case 'I':
+        if (command) {
+          if (modified == CharT{'E'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            if (!fds.has_tod) os.setstate(std::ios::failbit);
+            if (insert_negative) {
+              os << '-';
+              insert_negative = false;
+            }
+            auto hms = fds.tod;
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        const CharT f[] = {'%', modified, *fmt};
-                        tm.tm_hour = static_cast<int>(hms.hours().count());
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
+            if (modified == CharT{})
 #endif
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'j':
-            if (command)
             {
-                if (modified == CharT{})
-                {
-                    if (fds.ymd.ok() || fds.has_tod)
-                    {
-                        days doy;
-                        if (fds.ymd.ok())
-                        {
-                            auto ld = local_days(fds.ymd);
-                            auto y = fds.ymd.year();
-                            doy = ld - local_days(y/January/1) + days{1};
-                        }
-                        else
-                        {
-                            doy = duration_cast<days>(fds.tod.to_duration());
-                        }
-                        save_ostream<CharT, Traits> _(os);
-                        os.fill('0');
-                        os.flags(std::ios::dec | std::ios::right);
-                        os.width(3);
-                        os << doy.count();
-                    }
-                    else
-                    {
-                        os.setstate(std::ios::failbit);
-                    }
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
+              auto h = *fmt == CharT{'I'} ? make12(hms.hours()) : hms.hours();
+              if (h < hours{10}) os << CharT{'0'};
+              os << h.count();
             }
-            else
-                os << *fmt;
-            break;
-        case 'm':
-            if (command)
-            {
-                if (modified == CharT{'E'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    if (!fds.ymd.month().ok())
-                        os.setstate(std::ios::failbit);
-                    auto m = static_cast<unsigned>(fds.ymd.month());
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            else if (modified == CharT{'O'}) {
+              const CharT f[] = {'%', modified, *fmt};
+              tm.tm_hour = static_cast<int>(hms.hours().count());
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+            }
 #endif
-                    {
-                        if (m < 10)
-                            os << CharT{'0'};
-                        os << m;
-                    }
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'j':
+        if (command) {
+          if (modified == CharT{}) {
+            if (fds.ymd.ok() || fds.has_tod) {
+              days doy;
+              if (fds.ymd.ok()) {
+                auto ld = local_days(fds.ymd);
+                auto y = fds.ymd.year();
+                doy = ld - local_days(y / January / 1) + days{1};
+              } else {
+                doy = duration_cast<days>(fds.tod.to_duration());
+              }
+              save_ostream<CharT, Traits> _(os);
+              os.fill('0');
+              os.flags(std::ios::dec | std::ios::right);
+              os.width(3);
+              os << doy.count();
+            } else {
+              os.setstate(std::ios::failbit);
+            }
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'm':
+        if (command) {
+          if (modified == CharT{'E'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            if (!fds.ymd.month().ok()) os.setstate(std::ios::failbit);
+            auto m = static_cast<unsigned>(fds.ymd.month());
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        const CharT f[] = {'%', modified, *fmt};
-                        tm.tm_mon = static_cast<int>(m-1);
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
+            if (modified == CharT{})
 #endif
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'M':
-            if (command)
             {
-                if (modified == CharT{'E'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    if (!fds.has_tod)
-                        os.setstate(std::ios::failbit);
-                    if (insert_negative)
-                    {
-                        os << '-';
-                        insert_negative = false;
-                    }
+              if (m < 10) os << CharT{'0'};
+              os << m;
+            }
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            else if (modified == CharT{'O'}) {
+              const CharT f[] = {'%', modified, *fmt};
+              tm.tm_mon = static_cast<int>(m - 1);
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+            }
 #endif
-                    {
-                        if (fds.tod.minutes() < minutes{10})
-                            os << CharT{'0'};
-                        os << fds.tod.minutes().count();
-                    }
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'M':
+        if (command) {
+          if (modified == CharT{'E'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            if (!fds.has_tod) os.setstate(std::ios::failbit);
+            if (insert_negative) {
+              os << '-';
+              insert_negative = false;
+            }
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        const CharT f[] = {'%', modified, *fmt};
-                        tm.tm_min = static_cast<int>(fds.tod.minutes().count());
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
+            if (modified == CharT{})
 #endif
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'n':
-            if (command)
             {
-                if (modified == CharT{})
-                    os << CharT{'\n'};
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
+              if (fds.tod.minutes() < minutes{10}) os << CharT{'0'};
+              os << fds.tod.minutes().count();
             }
-            else
-                os << *fmt;
-            break;
-        case 'p':
-            if (command)
-            {
-                if (modified == CharT{})
-                {
-                    if (!fds.has_tod)
-                        os.setstate(std::ios::failbit);
 #if !ONLY_C_LOCALE
-                    const CharT f[] = {'%', *fmt};
-                    tm.tm_hour = static_cast<int>(fds.tod.hours().count());
-                    facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-#else
-                    if (is_am(fds.tod.hours()))
-                        os << ampm_names().first[0];
-                    else
-                        os << ampm_names().first[1];
-#endif
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                }
-                modified = CharT{};
-                command = nullptr;
+            else if (modified == CharT{'O'}) {
+              const CharT f[] = {'%', modified, *fmt};
+              tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
             }
+#endif
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'n':
+        if (command) {
+          if (modified == CharT{})
+            os << CharT{'\n'};
+          else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'p':
+        if (command) {
+          if (modified == CharT{}) {
+            if (!fds.has_tod) os.setstate(std::ios::failbit);
+#if !ONLY_C_LOCALE
+            const CharT f[] = {'%', *fmt};
+            tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+            facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+#else
+            if (is_am(fds.tod.hours()))
+              os << ampm_names().first[0];
             else
-                os << *fmt;
-            break;
-        case 'Q':
-        case 'q':
-            if (command)
-            {
-                if (modified == CharT{})
-                {
-                    if (!fds.has_tod)
-                        os.setstate(std::ios::failbit);
-                    auto d = fds.tod.to_duration();
-                    if (*fmt == 'q')
-                        os << get_units<CharT>(typename decltype(d)::period::type{});
-                    else
-                        os << d.count();
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
+              os << ampm_names().first[1];
+#endif
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'Q':
+      case 'q':
+        if (command) {
+          if (modified == CharT{}) {
+            if (!fds.has_tod) os.setstate(std::ios::failbit);
+            auto d = fds.tod.to_duration();
+            if (*fmt == 'q')
+              os << get_units<CharT>(typename decltype(d)::period::type{});
             else
-                os << *fmt;
-            break;
-        case 'r':
-            if (command)
-            {
-                if (modified == CharT{})
-                {
-                    if (!fds.has_tod)
-                        os.setstate(std::ios::failbit);
+              os << d.count();
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'r':
+        if (command) {
+          if (modified == CharT{}) {
+            if (!fds.has_tod) os.setstate(std::ios::failbit);
 #if !ONLY_C_LOCALE
-                    const CharT f[] = {'%', *fmt};
-                    tm.tm_hour = static_cast<int>(fds.tod.hours().count());
-                    tm.tm_min = static_cast<int>(fds.tod.minutes().count());
-                    tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
-                    facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+            const CharT f[] = {'%', *fmt};
+            tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+            tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+            tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
+            facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
 #else
-                    hh_mm_ss<seconds> tod(duration_cast<seconds>(fds.tod.to_duration()));
-                    save_ostream<CharT, Traits> _(os);
-                    os.fill('0');
-                    os.width(2);
-                    os << make12(tod.hours()).count() << CharT{':'};
-                    os.width(2);
-                    os << tod.minutes().count() << CharT{':'};
-                    os.width(2);
-                    os << tod.seconds().count() << CharT{' '};
-                    if (is_am(tod.hours()))
-                        os << ampm_names().first[0];
-                    else
-                        os << ampm_names().first[1];
+            hh_mm_ss<seconds> tod(duration_cast<seconds>(fds.tod.to_duration()));
+            save_ostream<CharT, Traits> _(os);
+            os.fill('0');
+            os.width(2);
+            os << make12(tod.hours()).count() << CharT{':'};
+            os.width(2);
+            os << tod.minutes().count() << CharT{':'};
+            os.width(2);
+            os << tod.seconds().count() << CharT{' '};
+            if (is_am(tod.hours()))
+              os << ampm_names().first[0];
+            else
+              os << ampm_names().first[1];
 #endif
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                }
-                modified = CharT{};
-                command = nullptr;
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'R':
+        if (command) {
+          if (modified == CharT{}) {
+            if (!fds.has_tod) os.setstate(std::ios::failbit);
+            if (fds.tod.hours() < hours{10}) os << CharT{'0'};
+            os << fds.tod.hours().count() << CharT{':'};
+            if (fds.tod.minutes() < minutes{10}) os << CharT{'0'};
+            os << fds.tod.minutes().count();
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'S':
+        if (command) {
+          if (modified == CharT{'E'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            if (!fds.has_tod) os.setstate(std::ios::failbit);
+            if (insert_negative) {
+              os << '-';
+              insert_negative = false;
             }
-            else
-                os << *fmt;
-            break;
-        case 'R':
-            if (command)
+#if !ONLY_C_LOCALE
+            if (modified == CharT{})
+#endif
             {
-                if (modified == CharT{})
-                {
-                    if (!fds.has_tod)
-                        os.setstate(std::ios::failbit);
-                    if (fds.tod.hours() < hours{10})
-                        os << CharT{'0'};
-                    os << fds.tod.hours().count() << CharT{':'};
-                    if (fds.tod.minutes() < minutes{10})
-                        os << CharT{'0'};
-                    os << fds.tod.minutes().count();
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
+              os << fds.tod.s_;
             }
-            else
-                os << *fmt;
-            break;
-        case 'S':
-            if (command)
-            {
-                if (modified == CharT{'E'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    if (!fds.has_tod)
-                        os.setstate(std::ios::failbit);
-                    if (insert_negative)
-                    {
-                        os << '-';
-                        insert_negative = false;
-                    }
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            else if (modified == CharT{'O'}) {
+              const CharT f[] = {'%', modified, *fmt};
+              tm.tm_sec = static_cast<int>(fds.tod.s_.seconds().count());
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+            }
 #endif
-                    {
-                        os << fds.tod.s_;
-                    }
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 't':
+        if (command) {
+          if (modified == CharT{})
+            os << CharT{'\t'};
+          else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'T':
+        if (command) {
+          if (modified == CharT{}) {
+            if (!fds.has_tod) os.setstate(std::ios::failbit);
+            os << fds.tod;
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'u':
+        if (command) {
+          if (modified == CharT{'E'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            auto wd = extract_weekday(os, fds);
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        const CharT f[] = {'%', modified, *fmt};
-                        tm.tm_sec = static_cast<int>(fds.tod.s_.seconds().count());
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
+            if (modified == CharT{})
 #endif
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 't':
-            if (command)
-            {
-                if (modified == CharT{})
-                    os << CharT{'\t'};
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'T':
-            if (command)
             {
-                if (modified == CharT{})
-                {
-                    if (!fds.has_tod)
-                        os.setstate(std::ios::failbit);
-                    os << fds.tod;
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
+              os << (wd != 0 ? wd : 7u);
             }
-            else
-                os << *fmt;
-            break;
-        case 'u':
-            if (command)
-            {
-                if (modified == CharT{'E'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    auto wd = extract_weekday(os, fds);
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            else if (modified == CharT{'O'}) {
+              const CharT f[] = {'%', modified, *fmt};
+              tm.tm_wday = static_cast<int>(wd);
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+            }
 #endif
-                    {
-                        os << (wd != 0 ? wd : 7u);
-                    }
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'U':
+        if (command) {
+          if (modified == CharT{'E'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            auto const& ymd = fds.ymd;
+            if (!ymd.ok()) os.setstate(std::ios::failbit);
+            auto ld = local_days(ymd);
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        const CharT f[] = {'%', modified, *fmt};
-                        tm.tm_wday = static_cast<int>(wd);
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
+            if (modified == CharT{})
 #endif
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'U':
-            if (command)
             {
-                if (modified == CharT{'E'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    auto const& ymd = fds.ymd;
-                    if (!ymd.ok())
-                        os.setstate(std::ios::failbit);
-                    auto ld = local_days(ymd);
+              auto st = local_days(Sunday[1] / January / ymd.year());
+              if (ld < st)
+                os << CharT{'0'} << CharT{'0'};
+              else {
+                auto wn = duration_cast<weeks>(ld - st).count() + 1;
+                if (wn < 10) os << CharT{'0'};
+                os << wn;
+              }
+            }
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            else if (modified == CharT{'O'}) {
+              const CharT f[] = {'%', modified, *fmt};
+              tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+              tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+              if (os.fail()) return os;
+              tm.tm_yday =
+                  static_cast<int>((ld - local_days(ymd.year() / 1 / 1)).count());
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+            }
 #endif
-                    {
-                        auto st = local_days(Sunday[1]/January/ymd.year());
-                        if (ld < st)
-                            os << CharT{'0'} << CharT{'0'};
-                        else
-                        {
-                            auto wn = duration_cast<weeks>(ld - st).count() + 1;
-                            if (wn < 10)
-                                os << CharT{'0'};
-                            os << wn;
-                        }
-                   }
- #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        const CharT f[] = {'%', modified, *fmt};
-                        tm.tm_year = static_cast<int>(ymd.year()) - 1900;
-                        tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
-                        if (os.fail())
-                            return os;
-                        tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'V':
+        if (command) {
+          if (modified == CharT{'E'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            if (!fds.ymd.ok()) os.setstate(std::ios::failbit);
+            auto ld = local_days(fds.ymd);
+#if !ONLY_C_LOCALE
+            if (modified == CharT{})
 #endif
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'V':
-            if (command)
             {
-                if (modified == CharT{'E'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    if (!fds.ymd.ok())
-                        os.setstate(std::ios::failbit);
-                    auto ld = local_days(fds.ymd);
+              auto y = year_month_day{ld + days{3}}.year();
+              auto st =
+                  local_days((y - years{1}) / 12 / Thursday[last]) + (Monday - Thursday);
+              if (ld < st) {
+                --y;
+                st = local_days((y - years{1}) / 12 / Thursday[last]) +
+                     (Monday - Thursday);
+              }
+              auto wn = duration_cast<weeks>(ld - st).count() + 1;
+              if (wn < 10) os << CharT{'0'};
+              os << wn;
+            }
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            else if (modified == CharT{'O'}) {
+              const CharT f[] = {'%', modified, *fmt};
+              auto const& ymd = fds.ymd;
+              tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+              tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+              if (os.fail()) return os;
+              tm.tm_yday =
+                  static_cast<int>((ld - local_days(ymd.year() / 1 / 1)).count());
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+            }
 #endif
-                    {
-                        auto y = year_month_day{ld + days{3}}.year();
-                        auto st = local_days((y-years{1})/12/Thursday[last]) +
-                                  (Monday-Thursday);
-                        if (ld < st)
-                        {
-                            --y;
-                            st = local_days((y - years{1})/12/Thursday[last]) +
-                                 (Monday-Thursday);
-                        }
-                        auto wn = duration_cast<weeks>(ld - st).count() + 1;
-                        if (wn < 10)
-                            os << CharT{'0'};
-                        os << wn;
-                    }
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'w':
+        if (command) {
+          auto wd = extract_weekday(os, fds);
+          if (os.fail()) return os;
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        const CharT f[] = {'%', modified, *fmt};
-                        auto const& ymd = fds.ymd;
-                        tm.tm_year = static_cast<int>(ymd.year()) - 1900;
-                        tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
-                        if (os.fail())
-                            return os;
-                        tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
+          if (modified == CharT{})
+#else
+          if (modified != CharT{'E'})
 #endif
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'w':
-            if (command)
-            {
-                auto wd = extract_weekday(os, fds);
-                if (os.fail())
-                    return os;
+          {
+            os << wd;
+          }
 #if !ONLY_C_LOCALE
-                if (modified == CharT{})
-#else
-                if (modified != CharT{'E'})
+          else if (modified == CharT{'O'}) {
+            const CharT f[] = {'%', modified, *fmt};
+            tm.tm_wday = static_cast<int>(wd);
+            facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+          }
 #endif
-                {
-                    os << wd;
-                }
+          else {
+            os << CharT{'%'} << modified << *fmt;
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'W':
+        if (command) {
+          if (modified == CharT{'E'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            auto const& ymd = fds.ymd;
+            if (!ymd.ok()) os.setstate(std::ios::failbit);
+            auto ld = local_days(ymd);
 #if !ONLY_C_LOCALE
-                else if (modified == CharT{'O'})
-                {
-                    const CharT f[] = {'%', modified, *fmt};
-                    tm.tm_wday = static_cast<int>(wd);
-                    facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                }
+            if (modified == CharT{})
 #endif
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'W':
-            if (command)
             {
-                if (modified == CharT{'E'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    auto const& ymd = fds.ymd;
-                    if (!ymd.ok())
-                        os.setstate(std::ios::failbit);
-                    auto ld = local_days(ymd);
+              auto st = local_days(Monday[1] / January / ymd.year());
+              if (ld < st)
+                os << CharT{'0'} << CharT{'0'};
+              else {
+                auto wn = duration_cast<weeks>(ld - st).count() + 1;
+                if (wn < 10) os << CharT{'0'};
+                os << wn;
+              }
+            }
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            else if (modified == CharT{'O'}) {
+              const CharT f[] = {'%', modified, *fmt};
+              tm.tm_year = static_cast<int>(ymd.year()) - 1900;
+              tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
+              if (os.fail()) return os;
+              tm.tm_yday =
+                  static_cast<int>((ld - local_days(ymd.year() / 1 / 1)).count());
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+            }
 #endif
-                    {
-                        auto st = local_days(Monday[1]/January/ymd.year());
-                        if (ld < st)
-                            os << CharT{'0'} << CharT{'0'};
-                        else
-                        {
-                            auto wn = duration_cast<weeks>(ld - st).count() + 1;
-                            if (wn < 10)
-                                os << CharT{'0'};
-                            os << wn;
-                        }
-                    }
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'X':
+        if (command) {
+          if (modified == CharT{'O'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            if (!fds.has_tod) os.setstate(std::ios::failbit);
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        const CharT f[] = {'%', modified, *fmt};
-                        tm.tm_year = static_cast<int>(ymd.year()) - 1900;
-                        tm.tm_wday = static_cast<int>(extract_weekday(os, fds));
-                        if (os.fail())
-                            return os;
-                        tm.tm_yday = static_cast<int>((ld - local_days(ymd.year()/1/1)).count());
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
-#endif
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'X':
-            if (command)
-            {
-                if (modified == CharT{'O'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    if (!fds.has_tod)
-                        os.setstate(std::ios::failbit);
-#if !ONLY_C_LOCALE
-                    tm = std::tm{};
-                    tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
-                    tm.tm_min = static_cast<int>(fds.tod.minutes().count());
-                    tm.tm_hour = static_cast<int>(fds.tod.hours().count());
-                    CharT f[3] = {'%'};
-                    auto fe = std::begin(f) + 1;
-                    if (modified == CharT{'E'})
-                        *fe++ = modified;
-                    *fe++ = *fmt;
-                    facet.put(os, os, os.fill(), &tm, std::begin(f), fe);
+            tm = std::tm{};
+            tm.tm_sec = static_cast<int>(fds.tod.seconds().count());
+            tm.tm_min = static_cast<int>(fds.tod.minutes().count());
+            tm.tm_hour = static_cast<int>(fds.tod.hours().count());
+            CharT f[3] = {'%'};
+            auto fe = std::begin(f) + 1;
+            if (modified == CharT{'E'}) *fe++ = modified;
+            *fe++ = *fmt;
+            facet.put(os, os, os.fill(), &tm, std::begin(f), fe);
 #else
-                    os << fds.tod;
-#endif
-                }
-                command = nullptr;
-                modified = CharT{};
-            }
-            else
-                os << *fmt;
-            break;
-        case 'y':
-            if (command)
-            {
-                if (!fds.ymd.year().ok())
-                    os.setstate(std::ios::failbit);
-                auto y = static_cast<int>(fds.ymd.year());
-#if !ONLY_C_LOCALE
-                if (modified == CharT{})
-                {
+            os << fds.tod;
 #endif
-                    y = std::abs(y) % 100;
-                    if (y < 10)
-                        os << CharT{'0'};
-                    os << y;
+          }
+          command = nullptr;
+          modified = CharT{};
+        } else
+          os << *fmt;
+        break;
+      case 'y':
+        if (command) {
+          if (!fds.ymd.year().ok()) os.setstate(std::ios::failbit);
+          auto y = static_cast<int>(fds.ymd.year());
 #if !ONLY_C_LOCALE
-                }
-                else
-                {
-                    const CharT f[] = {'%', modified, *fmt};
-                    tm.tm_year = y - 1900;
-                    facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                }
+          if (modified == CharT{}) {
 #endif
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'Y':
-            if (command)
-            {
-                if (modified == CharT{'O'})
-                    os << CharT{'%'} << modified << *fmt;
-                else
-                {
-                    if (!fds.ymd.year().ok())
-                        os.setstate(std::ios::failbit);
-                    auto y = fds.ymd.year();
+            y = std::abs(y) % 100;
+            if (y < 10) os << CharT{'0'};
+            os << y;
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+          } else {
+            const CharT f[] = {'%', modified, *fmt};
+            tm.tm_year = y - 1900;
+            facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
+          }
 #endif
-                    {
-                        os << y;
-                    }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'Y':
+        if (command) {
+          if (modified == CharT{'O'})
+            os << CharT{'%'} << modified << *fmt;
+          else {
+            if (!fds.ymd.year().ok()) os.setstate(std::ios::failbit);
+            auto y = fds.ymd.year();
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'E'})
-                    {
-                        const CharT f[] = {'%', modified, *fmt};
-                        tm.tm_year = static_cast<int>(y) - 1900;
-                        facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
-                    }
+            if (modified == CharT{})
 #endif
-                }
-                modified = CharT{};
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'z':
-            if (command)
-            {
-                if (offset_sec == nullptr)
-                {
-                    // Can not format %z with unknown offset
-                    os.setstate(ios::failbit);
-                    return os;
-                }
-                auto m = duration_cast<minutes>(*offset_sec);
-                auto neg = m < minutes{0};
-                m = date::abs(m);
-                auto h = duration_cast<hours>(m);
-                m -= h;
-                if (neg)
-                    os << CharT{'-'};
-                else
-                    os << CharT{'+'};
-                if (h < hours{10})
-                    os << CharT{'0'};
-                os << h.count();
-                if (modified != CharT{})
-                    os << CharT{':'};
-                if (m < minutes{10})
-                    os << CharT{'0'};
-                os << m.count();
-                command = nullptr;
-                modified = CharT{};
-            }
-            else
-                os << *fmt;
-            break;
-        case 'Z':
-            if (command)
-            {
-                if (modified == CharT{})
-                {
-                    if (abbrev == nullptr)
-                    {
-                        // Can not format %Z with unknown time_zone
-                        os.setstate(ios::failbit);
-                        return os;
-                    }
-                    for (auto c : *abbrev)
-                        os << CharT(c);
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    modified = CharT{};
-                }
-                command = nullptr;
-            }
-            else
-                os << *fmt;
-            break;
-        case 'E':
-        case 'O':
-            if (command)
-            {
-                if (modified == CharT{})
-                {
-                    modified = *fmt;
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << *fmt;
-                    command = nullptr;
-                    modified = CharT{};
-                }
-            }
-            else
-                os << *fmt;
-            break;
-        case '%':
-            if (command)
             {
-                if (modified == CharT{})
-                {
-                    os << CharT{'%'};
-                    command = nullptr;
-                }
-                else
-                {
-                    os << CharT{'%'} << modified << CharT{'%'};
-                    command = nullptr;
-                    modified = CharT{};
-                }
+              os << y;
             }
-            else
-                command = fmt;
-            break;
-        default:
-            if (command)
-            {
-                os << CharT{'%'};
-                command = nullptr;
+#if !ONLY_C_LOCALE
+            else if (modified == CharT{'E'}) {
+              const CharT f[] = {'%', modified, *fmt};
+              tm.tm_year = static_cast<int>(y) - 1900;
+              facet.put(os, os, os.fill(), &tm, std::begin(f), std::end(f));
             }
-            if (modified != CharT{})
-            {
-                os << modified;
-                modified = CharT{};
+#endif
+          }
+          modified = CharT{};
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'z':
+        if (command) {
+          if (offset_sec == nullptr) {
+            // Can not format %z with unknown offset
+            os.setstate(ios::failbit);
+            return os;
+          }
+          auto m = duration_cast<minutes>(*offset_sec);
+          auto neg = m < minutes{0};
+          m = date::abs(m);
+          auto h = duration_cast<hours>(m);
+          m -= h;
+          if (neg)
+            os << CharT{'-'};
+          else
+            os << CharT{'+'};
+          if (h < hours{10}) os << CharT{'0'};
+          os << h.count();
+          if (modified != CharT{}) os << CharT{':'};
+          if (m < minutes{10}) os << CharT{'0'};
+          os << m.count();
+          command = nullptr;
+          modified = CharT{};
+        } else
+          os << *fmt;
+        break;
+      case 'Z':
+        if (command) {
+          if (modified == CharT{}) {
+            if (abbrev == nullptr) {
+              // Can not format %Z with unknown time_zone
+              os.setstate(ios::failbit);
+              return os;
             }
-            os << *fmt;
-            break;
+            for (auto c : *abbrev) os << CharT(c);
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+            modified = CharT{};
+          }
+          command = nullptr;
+        } else
+          os << *fmt;
+        break;
+      case 'E':
+      case 'O':
+        if (command) {
+          if (modified == CharT{}) {
+            modified = *fmt;
+          } else {
+            os << CharT{'%'} << modified << *fmt;
+            command = nullptr;
+            modified = CharT{};
+          }
+        } else
+          os << *fmt;
+        break;
+      case '%':
+        if (command) {
+          if (modified == CharT{}) {
+            os << CharT{'%'};
+            command = nullptr;
+          } else {
+            os << CharT{'%'} << modified << CharT{'%'};
+            command = nullptr;
+            modified = CharT{};
+          }
+        } else
+          command = fmt;
+        break;
+      default:
+        if (command) {
+          os << CharT{'%'};
+          command = nullptr;
+        }
+        if (modified != CharT{}) {
+          os << modified;
+          modified = CharT{};
         }
+        os << *fmt;
+        break;
     }
-    if (command)
-        os << CharT{'%'};
-    if (modified != CharT{})
-        os << modified;
-    return os;
+  }
+  if (command) os << CharT{'%'};
+  if (modified != CharT{}) os << modified;
+  return os;
 }
 
 template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const year& y)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{y/0/0};
-    return to_stream(os, fmt, fds);
+inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
+                                                    const CharT* fmt, const year& y) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{y / 0 / 0};
+  return to_stream(os, fmt, fds);
 }
 
 template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const month& m)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{m/0/nanyear};
-    return to_stream(os, fmt, fds);
+inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
+                                                    const CharT* fmt, const month& m) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{m / 0 / nanyear};
+  return to_stream(os, fmt, fds);
 }
 
 template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const day& d)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{d/0/nanyear};
-    return to_stream(os, fmt, fds);
+inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
+                                                    const CharT* fmt, const day& d) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{d / 0 / nanyear};
+  return to_stream(os, fmt, fds);
 }
 
 template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const weekday& wd)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{wd};
-    return to_stream(os, fmt, fds);
+inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
+                                                    const CharT* fmt, const weekday& wd) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{wd};
+  return to_stream(os, fmt, fds);
 }
 
 template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const year_month& ym)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{ym/0};
-    return to_stream(os, fmt, fds);
+inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
+                                                    const CharT* fmt,
+                                                    const year_month& ym) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{ym / 0};
+  return to_stream(os, fmt, fds);
 }
 
 template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt, const month_day& md)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{md/nanyear};
-    return to_stream(os, fmt, fds);
+inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
+                                                    const CharT* fmt,
+                                                    const month_day& md) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{md / nanyear};
+  return to_stream(os, fmt, fds);
 }
 
 template <class CharT, class Traits>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-          const year_month_day& ymd)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{ymd};
-    return to_stream(os, fmt, fds);
+inline std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
+                                                    const CharT* fmt,
+                                                    const year_month_day& ymd) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{ymd};
+  return to_stream(os, fmt, fds);
 }
 
 template <class CharT, class Traits, class Rep, class Period>
-inline
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-          const std::chrono::duration<Rep, Period>& d)
-{
-    using Duration = std::chrono::duration<Rep, Period>;
-    using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
-    fields<CT> fds{hh_mm_ss<CT>{d}};
-    return to_stream(os, fmt, fds);
+inline std::basic_ostream<CharT, Traits>& to_stream(
+    std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+    const std::chrono::duration<Rep, Period>& d) {
+  using Duration = std::chrono::duration<Rep, Period>;
+  using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+  fields<CT> fds{hh_mm_ss<CT>{d}};
+  return to_stream(os, fmt, fds);
 }
 
 template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-          const local_time<Duration>& tp, const std::string* abbrev = nullptr,
-          const std::chrono::seconds* offset_sec = nullptr)
-{
-    using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
-    auto ld = floor<days>(tp);
-    fields<CT> fds{year_month_day{ld}, hh_mm_ss<CT>{tp-local_seconds{ld}}};
-    return to_stream(os, fmt, fds, abbrev, offset_sec);
+std::basic_ostream<CharT, Traits>& to_stream(
+    std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
+    const local_time<Duration>& tp, const std::string* abbrev = nullptr,
+    const std::chrono::seconds* offset_sec = nullptr) {
+  using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+  auto ld = floor<days>(tp);
+  fields<CT> fds{year_month_day{ld}, hh_mm_ss<CT>{tp - local_seconds{ld}}};
+  return to_stream(os, fmt, fds, abbrev, offset_sec);
 }
 
 template <class CharT, class Traits, class Duration>
-std::basic_ostream<CharT, Traits>&
-to_stream(std::basic_ostream<CharT, Traits>& os, const CharT* fmt,
-          const sys_time<Duration>& tp)
-{
-    using std::chrono::seconds;
-    using CT = typename std::common_type<Duration, seconds>::type;
-    const std::string abbrev("UTC");
-    CONSTDATA seconds offset{0};
-    auto sd = floor<days>(tp);
-    fields<CT> fds{year_month_day{sd}, hh_mm_ss<CT>{tp-sys_seconds{sd}}};
-    return to_stream(os, fmt, fds, &abbrev, &offset);
+std::basic_ostream<CharT, Traits>& to_stream(std::basic_ostream<CharT, Traits>& os,
+                                             const CharT* fmt,
+                                             const sys_time<Duration>& tp) {
+  using std::chrono::seconds;
+  using CT = typename std::common_type<Duration, seconds>::type;
+  const std::string abbrev("UTC");
+  CONSTDATA seconds offset{0};
+  auto sd = floor<days>(tp);
+  fields<CT> fds{year_month_day{sd}, hh_mm_ss<CT>{tp - sys_seconds{sd}}};
+  return to_stream(os, fmt, fds, &abbrev, &offset);
 }
 
 // format
 
 template <class CharT, class Streamable>
-auto
-format(const std::locale& loc, const CharT* fmt, const Streamable& tp)
+auto format(const std::locale& loc, const CharT* fmt, const Streamable& tp)
     -> decltype(to_stream(std::declval<std::basic_ostream<CharT>&>(), fmt, tp),
-                std::basic_string<CharT>{})
-{
-    std::basic_ostringstream<CharT> os;
-    os.exceptions(std::ios::failbit | std::ios::badbit);
-    os.imbue(loc);
-    to_stream(os, fmt, tp);
-    return os.str();
+                std::basic_string<CharT>{}) {
+  std::basic_ostringstream<CharT> os;
+  os.exceptions(std::ios::failbit | std::ios::badbit);
+  os.imbue(loc);
+  to_stream(os, fmt, tp);
+  return os.str();
 }
 
 template <class CharT, class Streamable>
-auto
-format(const CharT* fmt, const Streamable& tp)
+auto format(const CharT* fmt, const Streamable& tp)
     -> decltype(to_stream(std::declval<std::basic_ostream<CharT>&>(), fmt, tp),
-                std::basic_string<CharT>{})
-{
-    std::basic_ostringstream<CharT> os;
-    os.exceptions(std::ios::failbit | std::ios::badbit);
-    to_stream(os, fmt, tp);
-    return os.str();
+                std::basic_string<CharT>{}) {
+  std::basic_ostringstream<CharT> os;
+  os.exceptions(std::ios::failbit | std::ios::badbit);
+  to_stream(os, fmt, tp);
+  return os.str();
 }
 
 template <class CharT, class Traits, class Alloc, class Streamable>
-auto
-format(const std::locale& loc, const std::basic_string<CharT, Traits, Alloc>& fmt,
-       const Streamable& tp)
-    -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(), tp),
-                std::basic_string<CharT, Traits, Alloc>{})
-{
-    std::basic_ostringstream<CharT, Traits, Alloc> os;
-    os.exceptions(std::ios::failbit | std::ios::badbit);
-    os.imbue(loc);
-    to_stream(os, fmt.c_str(), tp);
-    return os.str();
+auto format(const std::locale& loc, const std::basic_string<CharT, Traits, Alloc>& fmt,
+            const Streamable& tp)
+    -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(),
+                          tp),
+                std::basic_string<CharT, Traits, Alloc>{}) {
+  std::basic_ostringstream<CharT, Traits, Alloc> os;
+  os.exceptions(std::ios::failbit | std::ios::badbit);
+  os.imbue(loc);
+  to_stream(os, fmt.c_str(), tp);
+  return os.str();
 }
 
 template <class CharT, class Traits, class Alloc, class Streamable>
-auto
-format(const std::basic_string<CharT, Traits, Alloc>& fmt, const Streamable& tp)
-    -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(), tp),
-                std::basic_string<CharT, Traits, Alloc>{})
-{
-    std::basic_ostringstream<CharT, Traits, Alloc> os;
-    os.exceptions(std::ios::failbit | std::ios::badbit);
-    to_stream(os, fmt.c_str(), tp);
-    return os.str();
+auto format(const std::basic_string<CharT, Traits, Alloc>& fmt, const Streamable& tp)
+    -> decltype(to_stream(std::declval<std::basic_ostream<CharT, Traits>&>(), fmt.c_str(),
+                          tp),
+                std::basic_string<CharT, Traits, Alloc>{}) {
+  std::basic_ostringstream<CharT, Traits, Alloc> os;
+  os.exceptions(std::ios::failbit | std::ios::badbit);
+  to_stream(os, fmt.c_str(), tp);
+  return os.str();
 }
 
 // parse
 
-namespace detail
-{
+namespace detail {
 
 template <class CharT, class Traits>
-bool
-read_char(std::basic_istream<CharT, Traits>& is, CharT fmt, std::ios::iostate& err)
-{
-    auto ic = is.get();
-    if (Traits::eq_int_type(ic, Traits::eof()) ||
-       !Traits::eq(Traits::to_char_type(ic), fmt))
-    {
-        err |= std::ios::failbit;
-        is.setstate(std::ios::failbit);
-        return false;
-    }
-    return true;
+bool read_char(std::basic_istream<CharT, Traits>& is, CharT fmt, std::ios::iostate& err) {
+  auto ic = is.get();
+  if (Traits::eq_int_type(ic, Traits::eof()) ||
+      !Traits::eq(Traits::to_char_type(ic), fmt)) {
+    err |= std::ios::failbit;
+    is.setstate(std::ios::failbit);
+    return false;
+  }
+  return true;
 }
 
 template <class CharT, class Traits>
-unsigned
-read_unsigned(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
-{
-    unsigned x = 0;
-    unsigned count = 0;
-    while (true)
-    {
-        auto ic = is.peek();
-        if (Traits::eq_int_type(ic, Traits::eof()))
-            break;
-        auto c = static_cast<char>(Traits::to_char_type(ic));
-        if (!('0' <= c && c <= '9'))
-            break;
-        (void)is.get();
-        ++count;
-        x = 10*x + static_cast<unsigned>(c - '0');
-        if (count == M)
-            break;
-    }
-    if (count < m)
-        is.setstate(std::ios::failbit);
-    return x;
+unsigned read_unsigned(std::basic_istream<CharT, Traits>& is, unsigned m = 1,
+                       unsigned M = 10) {
+  unsigned x = 0;
+  unsigned count = 0;
+  while (true) {
+    auto ic = is.peek();
+    if (Traits::eq_int_type(ic, Traits::eof())) break;
+    auto c = static_cast<char>(Traits::to_char_type(ic));
+    if (!('0' <= c && c <= '9')) break;
+    (void)is.get();
+    ++count;
+    x = 10 * x + static_cast<unsigned>(c - '0');
+    if (count == M) break;
+  }
+  if (count < m) is.setstate(std::ios::failbit);
+  return x;
 }
 
 template <class CharT, class Traits>
-int
-read_signed(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
-{
-    auto ic = is.peek();
-    if (!Traits::eq_int_type(ic, Traits::eof()))
-    {
-        auto c = static_cast<char>(Traits::to_char_type(ic));
-        if (('0' <= c && c <= '9') || c == '-' || c == '+')
-        {
-            if (c == '-' || c == '+')
-                (void)is.get();
-            auto x = static_cast<int>(read_unsigned(is, std::max(m, 1u), M));
-            if (!is.fail())
-            {
-                if (c == '-')
-                    x = -x;
-                return x;
-            }
-        }
+int read_signed(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10) {
+  auto ic = is.peek();
+  if (!Traits::eq_int_type(ic, Traits::eof())) {
+    auto c = static_cast<char>(Traits::to_char_type(ic));
+    if (('0' <= c && c <= '9') || c == '-' || c == '+') {
+      if (c == '-' || c == '+') (void)is.get();
+      auto x = static_cast<int>(read_unsigned(is, std::max(m, 1u), M));
+      if (!is.fail()) {
+        if (c == '-') x = -x;
+        return x;
+      }
     }
-    if (m > 0)
-        is.setstate(std::ios::failbit);
-    return 0;
+  }
+  if (m > 0) is.setstate(std::ios::failbit);
+  return 0;
 }
 
 template <class CharT, class Traits>
-long double
-read_long_double(std::basic_istream<CharT, Traits>& is, unsigned m = 1, unsigned M = 10)
-{
-    unsigned count = 0;
-    auto decimal_point = Traits::to_int_type(
-        std::use_facet<std::numpunct<CharT>>(is.getloc()).decimal_point());
-    std::string buf;
-    while (true)
-    {
-        auto ic = is.peek();
-        if (Traits::eq_int_type(ic, Traits::eof()))
-            break;
-        if (Traits::eq_int_type(ic, decimal_point))
-        {
-            buf += '.';
-            decimal_point = Traits::eof();
-            is.get();
-        }
-        else
-        {
-            auto c = static_cast<char>(Traits::to_char_type(ic));
-            if (!('0' <= c && c <= '9'))
-                break;
-            buf += c;
-            (void)is.get();
-        }
-        if (++count == M)
-            break;
-    }
-    if (count < m)
-    {
-        is.setstate(std::ios::failbit);
-        return 0;
+long double read_long_double(std::basic_istream<CharT, Traits>& is, unsigned m = 1,
+                             unsigned M = 10) {
+  unsigned count = 0;
+  auto decimal_point = Traits::to_int_type(
+      std::use_facet<std::numpunct<CharT>>(is.getloc()).decimal_point());
+  std::string buf;
+  while (true) {
+    auto ic = is.peek();
+    if (Traits::eq_int_type(ic, Traits::eof())) break;
+    if (Traits::eq_int_type(ic, decimal_point)) {
+      buf += '.';
+      decimal_point = Traits::eof();
+      is.get();
+    } else {
+      auto c = static_cast<char>(Traits::to_char_type(ic));
+      if (!('0' <= c && c <= '9')) break;
+      buf += c;
+      (void)is.get();
     }
-    return std::stold(buf);
+    if (++count == M) break;
+  }
+  if (count < m) {
+    is.setstate(std::ios::failbit);
+    return 0;
+  }
+  return std::stold(buf);
 }
 
-struct rs
-{
-    int& i;
-    unsigned m;
-    unsigned M;
+struct rs {
+  int& i;
+  unsigned m;
+  unsigned M;
 };
 
-struct ru
-{
-    int& i;
-    unsigned m;
-    unsigned M;
+struct ru {
+  int& i;
+  unsigned m;
+  unsigned M;
 };
 
-struct rld
-{
-    long double& i;
-    unsigned m;
-    unsigned M;
+struct rld {
+  long double& i;
+  unsigned m;
+  unsigned M;
 };
 
 template <class CharT, class Traits>
-void
-read(std::basic_istream<CharT, Traits>&)
-{
-}
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&& ...args);
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, rs a0, Args&& ...args);
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, ru a0, Args&& ...args);
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, int a0, Args&& ...args);
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, rld a0, Args&& ...args);
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&& ...args)
-{
-    // No-op if a0 == CharT{}
-    if (a0 != CharT{})
-    {
-        auto ic = is.peek();
-        if (Traits::eq_int_type(ic, Traits::eof()))
-        {
-            is.setstate(std::ios::failbit | std::ios::eofbit);
-            return;
-        }
-        if (!Traits::eq(Traits::to_char_type(ic), a0))
-        {
-            is.setstate(std::ios::failbit);
-            return;
-        }
-        (void)is.get();
+void read(std::basic_istream<CharT, Traits>&) {}
+
+template <class CharT, class Traits, class... Args>
+void read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&&... args);
+
+template <class CharT, class Traits, class... Args>
+void read(std::basic_istream<CharT, Traits>& is, rs a0, Args&&... args);
+
+template <class CharT, class Traits, class... Args>
+void read(std::basic_istream<CharT, Traits>& is, ru a0, Args&&... args);
+
+template <class CharT, class Traits, class... Args>
+void read(std::basic_istream<CharT, Traits>& is, int a0, Args&&... args);
+
+template <class CharT, class Traits, class... Args>
+void read(std::basic_istream<CharT, Traits>& is, rld a0, Args&&... args);
+
+template <class CharT, class Traits, class... Args>
+void read(std::basic_istream<CharT, Traits>& is, CharT a0, Args&&... args) {
+  // No-op if a0 == CharT{}
+  if (a0 != CharT{}) {
+    auto ic = is.peek();
+    if (Traits::eq_int_type(ic, Traits::eof())) {
+      is.setstate(std::ios::failbit | std::ios::eofbit);
+      return;
     }
-    read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, rs a0, Args&& ...args)
-{
-    auto x = read_signed(is, a0.m, a0.M);
-    if (is.fail())
-        return;
-    a0.i = x;
-    read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, ru a0, Args&& ...args)
-{
-    auto x = read_unsigned(is, a0.m, a0.M);
-    if (is.fail())
-        return;
-    a0.i = static_cast<int>(x);
-    read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, int a0, Args&& ...args)
-{
-    if (a0 != -1)
-    {
-        auto u = static_cast<unsigned>(a0);
-        CharT buf[std::numeric_limits<unsigned>::digits10+2] = {};
-        auto e = buf;
-        do
-        {
-            *e++ = static_cast<CharT>(CharT(u % 10) + CharT{'0'});
-            u /= 10;
-        } while (u > 0);
-        std::reverse(buf, e);
-        for (auto p = buf; p != e && is.rdstate() == std::ios::goodbit; ++p)
-            read(is, *p);
+    if (!Traits::eq(Traits::to_char_type(ic), a0)) {
+      is.setstate(std::ios::failbit);
+      return;
     }
-    if (is.rdstate() == std::ios::goodbit)
-        read(is, std::forward<Args>(args)...);
-}
-
-template <class CharT, class Traits, class ...Args>
-void
-read(std::basic_istream<CharT, Traits>& is, rld a0, Args&& ...args)
-{
-    auto x = read_long_double(is, a0.m, a0.M);
-    if (is.fail())
-        return;
-    a0.i = x;
-    read(is, std::forward<Args>(args)...);
+    (void)is.get();
+  }
+  read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class... Args>
+void read(std::basic_istream<CharT, Traits>& is, rs a0, Args&&... args) {
+  auto x = read_signed(is, a0.m, a0.M);
+  if (is.fail()) return;
+  a0.i = x;
+  read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class... Args>
+void read(std::basic_istream<CharT, Traits>& is, ru a0, Args&&... args) {
+  auto x = read_unsigned(is, a0.m, a0.M);
+  if (is.fail()) return;
+  a0.i = static_cast<int>(x);
+  read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class... Args>
+void read(std::basic_istream<CharT, Traits>& is, int a0, Args&&... args) {
+  if (a0 != -1) {
+    auto u = static_cast<unsigned>(a0);
+    CharT buf[std::numeric_limits<unsigned>::digits10 + 2] = {};
+    auto e = buf;
+    do {
+      *e++ = static_cast<CharT>(CharT(u % 10) + CharT{'0'});
+      u /= 10;
+    } while (u > 0);
+    std::reverse(buf, e);
+    for (auto p = buf; p != e && is.rdstate() == std::ios::goodbit; ++p) read(is, *p);
+  }
+  if (is.rdstate() == std::ios::goodbit) read(is, std::forward<Args>(args)...);
+}
+
+template <class CharT, class Traits, class... Args>
+void read(std::basic_istream<CharT, Traits>& is, rld a0, Args&&... args) {
+  auto x = read_long_double(is, a0.m, a0.M);
+  if (is.fail()) return;
+  a0.i = x;
+  read(is, std::forward<Args>(args)...);
 }
 
 template <class T, class CharT, class Traits>
-inline
-void
-checked_set(T& value, T from, T not_a_value, std::basic_ios<CharT, Traits>& is)
-{
-    if (!is.fail())
-    {
-        if (value == not_a_value)
-            value = std::move(from);
-        else if (value != from)
-            is.setstate(std::ios::failbit);
-    }
+inline void checked_set(T& value, T from, T not_a_value,
+                        std::basic_ios<CharT, Traits>& is) {
+  if (!is.fail()) {
+    if (value == not_a_value)
+      value = std::move(from);
+    else if (value != from)
+      is.setstate(std::ios::failbit);
+  }
 }
 
-}  // namespace detail;
+}  // namespace detail
 
 template <class CharT, class Traits, class Duration, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
-            fields<Duration>& fds, std::basic_string<CharT, Traits, Alloc>* abbrev,
-            std::chrono::minutes* offset)
-{
-    using std::numeric_limits;
-    using std::ios;
-    using std::chrono::duration;
-    using std::chrono::duration_cast;
-    using std::chrono::seconds;
-    using std::chrono::minutes;
-    using std::chrono::hours;
-    typename std::basic_istream<CharT, Traits>::sentry ok{is, true};
-    if (ok)
-    {
-        date::detail::save_istream<CharT, Traits> ss(is);
-        is.fill(' ');
-        is.flags(std::ios::skipws | std::ios::dec);
-        is.width(0);
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, fields<Duration>& fds,
+    std::basic_string<CharT, Traits, Alloc>* abbrev, std::chrono::minutes* offset) {
+  using std::ios;
+  using std::numeric_limits;
+  using std::chrono::duration;
+  using std::chrono::duration_cast;
+  using std::chrono::hours;
+  using std::chrono::minutes;
+  using std::chrono::seconds;
+  typename std::basic_istream<CharT, Traits>::sentry ok{is, true};
+  if (ok) {
+    date::detail::save_istream<CharT, Traits> ss(is);
+    is.fill(' ');
+    is.flags(std::ios::skipws | std::ios::dec);
+    is.width(0);
 #if !ONLY_C_LOCALE
-        auto& f = std::use_facet<std::time_get<CharT>>(is.getloc());
-        std::tm tm{};
+    auto& f = std::use_facet<std::time_get<CharT>>(is.getloc());
+    std::tm tm{};
 #endif
-        const CharT* command = nullptr;
-        auto modified = CharT{};
-        auto width = -1;
-
-        CONSTDATA int not_a_year = numeric_limits<int>::min();
-        CONSTDATA int not_a_2digit_year = 100;
-        CONSTDATA int not_a_century = not_a_year / 100;
-        CONSTDATA int not_a_month = 0;
-        CONSTDATA int not_a_day = 0;
-        CONSTDATA int not_a_hour = numeric_limits<int>::min();
-        CONSTDATA int not_a_hour_12_value = 0;
-        CONSTDATA int not_a_minute = not_a_hour;
-        CONSTDATA Duration not_a_second = Duration::min();
-        CONSTDATA int not_a_doy = -1;
-        CONSTDATA int not_a_weekday = 8;
-        CONSTDATA int not_a_week_num = 100;
-        CONSTDATA int not_a_ampm = -1;
-        CONSTDATA minutes not_a_offset = minutes::min();
-
-        int Y = not_a_year;             // c, F, Y                   *
-        int y = not_a_2digit_year;      // D, x, y                   *
-        int g = not_a_2digit_year;      // g                         *
-        int G = not_a_year;             // G                         *
-        int C = not_a_century;          // C                         *
-        int m = not_a_month;            // b, B, h, m, c, D, F, x    *
-        int d = not_a_day;              // c, d, D, e, F, x          *
-        int j = not_a_doy;              // j                         *
-        int wd = not_a_weekday;         // a, A, u, w                *
-        int H = not_a_hour;             // c, H, R, T, X             *
-        int I = not_a_hour_12_value;    // I, r                      *
-        int p = not_a_ampm;             // p, r                      *
-        int M = not_a_minute;           // c, M, r, R, T, X          *
-        Duration s = not_a_second;      // c, r, S, T, X             *
-        int U = not_a_week_num;         // U                         *
-        int V = not_a_week_num;         // V                         *
-        int W = not_a_week_num;         // W                         *
-        std::basic_string<CharT, Traits, Alloc> temp_abbrev;  // Z   *
-        minutes temp_offset = not_a_offset;  // z                    *
-
-        using detail::read;
-        using detail::rs;
-        using detail::ru;
-        using detail::rld;
-        using detail::checked_set;
-        for (; *fmt && is.rdstate() == std::ios::goodbit; ++fmt)
-        {
-            switch (*fmt)
-            {
-            case 'a':
-            case 'A':
-            case 'u':
-            case 'w':  // wd:  a, A, u, w
-                if (command)
-                {
-                    int trial_wd = not_a_weekday;
-                    if (*fmt == 'a' || *fmt == 'A')
-                    {
-                        if (modified == CharT{})
-                        {
+    const CharT* command = nullptr;
+    auto modified = CharT{};
+    auto width = -1;
+
+    CONSTDATA int not_a_year = numeric_limits<int>::min();
+    CONSTDATA int not_a_2digit_year = 100;
+    CONSTDATA int not_a_century = not_a_year / 100;
+    CONSTDATA int not_a_month = 0;
+    CONSTDATA int not_a_day = 0;
+    CONSTDATA int not_a_hour = numeric_limits<int>::min();
+    CONSTDATA int not_a_hour_12_value = 0;
+    CONSTDATA int not_a_minute = not_a_hour;
+    CONSTDATA Duration not_a_second = Duration::min();
+    CONSTDATA int not_a_doy = -1;
+    CONSTDATA int not_a_weekday = 8;
+    CONSTDATA int not_a_week_num = 100;
+    CONSTDATA int not_a_ampm = -1;
+    CONSTDATA minutes not_a_offset = minutes::min();
+
+    int Y = not_a_year;                                   // c, F, Y                   *
+    int y = not_a_2digit_year;                            // D, x, y                   *
+    int g = not_a_2digit_year;                            // g                         *
+    int G = not_a_year;                                   // G                         *
+    int C = not_a_century;                                // C                         *
+    int m = not_a_month;                                  // b, B, h, m, c, D, F, x    *
+    int d = not_a_day;                                    // c, d, D, e, F, x          *
+    int j = not_a_doy;                                    // j                         *
+    int wd = not_a_weekday;                               // a, A, u, w                *
+    int H = not_a_hour;                                   // c, H, R, T, X             *
+    int I = not_a_hour_12_value;                          // I, r                      *
+    int p = not_a_ampm;                                   // p, r                      *
+    int M = not_a_minute;                                 // c, M, r, R, T, X          *
+    Duration s = not_a_second;                            // c, r, S, T, X             *
+    int U = not_a_week_num;                               // U                         *
+    int V = not_a_week_num;                               // V                         *
+    int W = not_a_week_num;                               // W                         *
+    std::basic_string<CharT, Traits, Alloc> temp_abbrev;  // Z   *
+    minutes temp_offset = not_a_offset;                   // z                    *
+
+    using detail::checked_set;
+    using detail::read;
+    using detail::rld;
+    using detail::rs;
+    using detail::ru;
+    for (; *fmt && is.rdstate() == std::ios::goodbit; ++fmt) {
+      switch (*fmt) {
+        case 'a':
+        case 'A':
+        case 'u':
+        case 'w':  // wd:  a, A, u, w
+          if (command) {
+            int trial_wd = not_a_weekday;
+            if (*fmt == 'a' || *fmt == 'A') {
+              if (modified == CharT{}) {
 #if !ONLY_C_LOCALE
-                            ios::iostate err = ios::goodbit;
-                            f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                            is.setstate(err);
-                            if (!is.fail())
-                                trial_wd = tm.tm_wday;
+                ios::iostate err = ios::goodbit;
+                f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+                is.setstate(err);
+                if (!is.fail()) trial_wd = tm.tm_wday;
 #else
-                            auto nm = detail::weekday_names();
-                            auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-                            if (!is.fail())
-                                trial_wd = i % 7;
+                auto nm = detail::weekday_names();
+                auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+                if (!is.fail()) trial_wd = i % 7;
 #endif
-                        }
-                        else
-                            read(is, CharT{'%'}, width, modified, *fmt);
-                    }
-                    else  // *fmt == 'u' || *fmt == 'w'
-                    {
+              } else
+                read(is, CharT{'%'}, width, modified, *fmt);
+            } else  // *fmt == 'u' || *fmt == 'w'
+            {
 #if !ONLY_C_LOCALE
-                        if (modified == CharT{})
+              if (modified == CharT{})
 #else
-                        if (modified != CharT{'E'})
-#endif
-                        {
-                            read(is, ru{trial_wd, 1, width == -1 ?
-                                                      1u : static_cast<unsigned>(width)});
-                            if (!is.fail())
-                            {
-                                if (*fmt == 'u')
-                                {
-                                    if (!(1 <= trial_wd && trial_wd <= 7))
-                                    {
-                                        trial_wd = not_a_weekday;
-                                        is.setstate(ios::failbit);
-                                    }
-                                    else if (trial_wd == 7)
-                                        trial_wd = 0;
-                                }
-                                else  // *fmt == 'w'
-                                {
-                                    if (!(0 <= trial_wd && trial_wd <= 6))
-                                    {
-                                        trial_wd = not_a_weekday;
-                                        is.setstate(ios::failbit);
-                                    }
-                                }
-                            }
-                        }
-#if !ONLY_C_LOCALE
-                        else if (modified == CharT{'O'})
-                        {
-                            ios::iostate err = ios::goodbit;
-                            f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                            is.setstate(err);
-                            if (!is.fail())
-                                trial_wd = tm.tm_wday;
-                        }
+              if (modified != CharT{'E'})
 #endif
-                        else
-                            read(is, CharT{'%'}, width, modified, *fmt);
+              {
+                read(is,
+                     ru{trial_wd, 1, width == -1 ? 1u : static_cast<unsigned>(width)});
+                if (!is.fail()) {
+                  if (*fmt == 'u') {
+                    if (!(1 <= trial_wd && trial_wd <= 7)) {
+                      trial_wd = not_a_weekday;
+                      is.setstate(ios::failbit);
+                    } else if (trial_wd == 7)
+                      trial_wd = 0;
+                  } else  // *fmt == 'w'
+                  {
+                    if (!(0 <= trial_wd && trial_wd <= 6)) {
+                      trial_wd = not_a_weekday;
+                      is.setstate(ios::failbit);
                     }
-                    if (trial_wd != not_a_weekday)
-                        checked_set(wd, trial_wd, not_a_weekday, is);
+                  }
                 }
-                else  // !command
-                    read(is, *fmt);
-                command = nullptr;
-                width = -1;
-                modified = CharT{};
-                break;
-            case 'b':
-            case 'B':
-            case 'h':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int ttm = not_a_month;
+              }
 #if !ONLY_C_LOCALE
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                            ttm = tm.tm_mon + 1;
-                        is.setstate(err);
+              else if (modified == CharT{'O'}) {
+                ios::iostate err = ios::goodbit;
+                f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+                is.setstate(err);
+                if (!is.fail()) trial_wd = tm.tm_wday;
+              }
+#endif
+              else
+                read(is, CharT{'%'}, width, modified, *fmt);
+            }
+            if (trial_wd != not_a_weekday) checked_set(wd, trial_wd, not_a_weekday, is);
+          } else  // !command
+            read(is, *fmt);
+          command = nullptr;
+          width = -1;
+          modified = CharT{};
+          break;
+        case 'b':
+        case 'B':
+        case 'h':
+          if (command) {
+            if (modified == CharT{}) {
+              int ttm = not_a_month;
+#if !ONLY_C_LOCALE
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0) ttm = tm.tm_mon + 1;
+              is.setstate(err);
 #else
-                        auto nm = detail::month_names();
-                        auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-                        if (!is.fail())
-                            ttm = i % 12 + 1;
+              auto nm = detail::month_names();
+              auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+              if (!is.fail()) ttm = i % 12 + 1;
 #endif
-                        checked_set(m, ttm, not_a_month, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'c':
-                if (command)
-                {
-                    if (modified != CharT{'O'})
-                    {
+              checked_set(m, ttm, not_a_month, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'c':
+          if (command) {
+            if (modified != CharT{'O'}) {
 #if !ONLY_C_LOCALE
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                        {
-                            checked_set(Y, tm.tm_year + 1900, not_a_year, is);
-                            checked_set(m, tm.tm_mon + 1, not_a_month, is);
-                            checked_set(d, tm.tm_mday, not_a_day, is);
-                            checked_set(H, tm.tm_hour, not_a_hour, is);
-                            checked_set(M, tm.tm_min, not_a_minute, is);
-                            checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
-                                        not_a_second, is);
-                        }
-                        is.setstate(err);
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0) {
+                checked_set(Y, tm.tm_year + 1900, not_a_year, is);
+                checked_set(m, tm.tm_mon + 1, not_a_month, is);
+                checked_set(d, tm.tm_mday, not_a_day, is);
+                checked_set(H, tm.tm_hour, not_a_hour, is);
+                checked_set(M, tm.tm_min, not_a_minute, is);
+                checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}), not_a_second,
+                            is);
+              }
+              is.setstate(err);
 #else
-                        // "%a %b %e %T %Y"
-                        auto nm = detail::weekday_names();
-                        auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-                        checked_set(wd, static_cast<int>(i % 7), not_a_weekday, is);
-                        ws(is);
-                        nm = detail::month_names();
-                        i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-                        checked_set(m, static_cast<int>(i % 12 + 1), not_a_month, is);
-                        ws(is);
-                        int td = not_a_day;
-                        read(is, rs{td, 1, 2});
-                        checked_set(d, td, not_a_day, is);
-                        ws(is);
-                        using dfs = detail::decimal_format_seconds<Duration>;
-                        CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
-                        int tH;
-                        int tM;
-                        long double S;
-                        read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2},
-                                               CharT{':'}, rld{S, 1, w});
-                        checked_set(H, tH, not_a_hour, is);
-                        checked_set(M, tM, not_a_minute, is);
-                        checked_set(s, round<Duration>(duration<long double>{S}),
-                                    not_a_second, is);
-                        ws(is);
-                        int tY = not_a_year;
-                        read(is, rs{tY, 1, 4u});
-                        checked_set(Y, tY, not_a_year, is);
+              // "%a %b %e %T %Y"
+              auto nm = detail::weekday_names();
+              auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+              checked_set(wd, static_cast<int>(i % 7), not_a_weekday, is);
+              ws(is);
+              nm = detail::month_names();
+              i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+              checked_set(m, static_cast<int>(i % 12 + 1), not_a_month, is);
+              ws(is);
+              int td = not_a_day;
+              read(is, rs{td, 1, 2});
+              checked_set(d, td, not_a_day, is);
+              ws(is);
+              using dfs = detail::decimal_format_seconds<Duration>;
+              CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+              int tH;
+              int tM;
+              long double S;
+              read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w});
+              checked_set(H, tH, not_a_hour, is);
+              checked_set(M, tM, not_a_minute, is);
+              checked_set(s, round<Duration>(duration<long double>{S}), not_a_second, is);
+              ws(is);
+              int tY = not_a_year;
+              read(is, rs{tY, 1, 4u});
+              checked_set(Y, tY, not_a_year, is);
 #endif
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'x':
-                if (command)
-                {
-                    if (modified != CharT{'O'})
-                    {
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'x':
+          if (command) {
+            if (modified != CharT{'O'}) {
 #if !ONLY_C_LOCALE
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                        {
-                            checked_set(Y, tm.tm_year + 1900, not_a_year, is);
-                            checked_set(m, tm.tm_mon + 1, not_a_month, is);
-                            checked_set(d, tm.tm_mday, not_a_day, is);
-                        }
-                        is.setstate(err);
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0) {
+                checked_set(Y, tm.tm_year + 1900, not_a_year, is);
+                checked_set(m, tm.tm_mon + 1, not_a_month, is);
+                checked_set(d, tm.tm_mday, not_a_day, is);
+              }
+              is.setstate(err);
 #else
-                        // "%m/%d/%y"
-                        int ty = not_a_2digit_year;
-                        int tm = not_a_month;
-                        int td = not_a_day;
-                        read(is, ru{tm, 1, 2}, CharT{'/'}, ru{td, 1, 2}, CharT{'/'},
-                                 rs{ty, 1, 2});
-                        checked_set(y, ty, not_a_2digit_year, is);
-                        checked_set(m, tm, not_a_month, is);
-                        checked_set(d, td, not_a_day, is);
+              // "%m/%d/%y"
+              int ty = not_a_2digit_year;
+              int tm = not_a_month;
+              int td = not_a_day;
+              read(is, ru{tm, 1, 2}, CharT{'/'}, ru{td, 1, 2}, CharT{'/'}, rs{ty, 1, 2});
+              checked_set(y, ty, not_a_2digit_year, is);
+              checked_set(m, tm, not_a_month, is);
+              checked_set(d, td, not_a_day, is);
 #endif
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'X':
-                if (command)
-                {
-                    if (modified != CharT{'O'})
-                    {
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'X':
+          if (command) {
+            if (modified != CharT{'O'}) {
 #if !ONLY_C_LOCALE
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                        {
-                            checked_set(H, tm.tm_hour, not_a_hour, is);
-                            checked_set(M, tm.tm_min, not_a_minute, is);
-                            checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
-                                        not_a_second, is);
-                        }
-                        is.setstate(err);
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0) {
+                checked_set(H, tm.tm_hour, not_a_hour, is);
+                checked_set(M, tm.tm_min, not_a_minute, is);
+                checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}), not_a_second,
+                            is);
+              }
+              is.setstate(err);
 #else
-                        // "%T"
-                        using dfs = detail::decimal_format_seconds<Duration>;
-                        CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
-                        int tH = not_a_hour;
-                        int tM = not_a_minute;
-                        long double S;
-                        read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2},
-                                               CharT{':'}, rld{S, 1, w});
-                        checked_set(H, tH, not_a_hour, is);
-                        checked_set(M, tM, not_a_minute, is);
-                        checked_set(s, round<Duration>(duration<long double>{S}),
-                                    not_a_second, is);
+              // "%T"
+              using dfs = detail::decimal_format_seconds<Duration>;
+              CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+              int tH = not_a_hour;
+              int tM = not_a_minute;
+              long double S;
+              read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w});
+              checked_set(H, tH, not_a_hour, is);
+              checked_set(M, tM, not_a_minute, is);
+              checked_set(s, round<Duration>(duration<long double>{S}), not_a_second, is);
 #endif
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'C':
-                if (command)
-                {
-                    int tC = not_a_century;
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'C':
+          if (command) {
+            int tC = not_a_century;
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
-                    {
+            if (modified == CharT{}) {
 #endif
-                        read(is, rs{tC, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              read(is, rs{tC, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
 #if !ONLY_C_LOCALE
-                    }
-                    else
-                    {
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                        {
-                            auto tY = tm.tm_year + 1900;
-                            tC = (tY >= 0 ? tY : tY-99) / 100;
-                        }
-                        is.setstate(err);
-                    }
+            } else {
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0) {
+                auto tY = tm.tm_year + 1900;
+                tC = (tY >= 0 ? tY : tY - 99) / 100;
+              }
+              is.setstate(err);
+            }
 #endif
-                    checked_set(C, tC, not_a_century, is);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'D':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tn = not_a_month;
-                        int td = not_a_day;
-                        int ty = not_a_2digit_year;
-                        read(is, ru{tn, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'},
-                                 ru{td, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'},
-                                 rs{ty, 1, 2});
-                        checked_set(y, ty, not_a_2digit_year, is);
-                        checked_set(m, tn, not_a_month, is);
-                        checked_set(d, td, not_a_day, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'F':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tY = not_a_year;
-                        int tn = not_a_month;
-                        int td = not_a_day;
-                        read(is, rs{tY, 1, width == -1 ? 4u : static_cast<unsigned>(width)},
-                                 CharT{'-'}, ru{tn, 1, 2}, CharT{'-'}, ru{td, 1, 2});
-                        checked_set(Y, tY, not_a_year, is);
-                        checked_set(m, tn, not_a_month, is);
-                        checked_set(d, td, not_a_day, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'd':
-            case 'e':
-                if (command)
-                {
+            checked_set(C, tC, not_a_century, is);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'D':
+          if (command) {
+            if (modified == CharT{}) {
+              int tn = not_a_month;
+              int td = not_a_day;
+              int ty = not_a_2digit_year;
+              read(is, ru{tn, 1, 2}, CharT{'\0'}, CharT{'/'}, CharT{'\0'}, ru{td, 1, 2},
+                   CharT{'\0'}, CharT{'/'}, CharT{'\0'}, rs{ty, 1, 2});
+              checked_set(y, ty, not_a_2digit_year, is);
+              checked_set(m, tn, not_a_month, is);
+              checked_set(d, td, not_a_day, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'F':
+          if (command) {
+            if (modified == CharT{}) {
+              int tY = not_a_year;
+              int tn = not_a_month;
+              int td = not_a_day;
+              read(is, rs{tY, 1, width == -1 ? 4u : static_cast<unsigned>(width)},
+                   CharT{'-'}, ru{tn, 1, 2}, CharT{'-'}, ru{td, 1, 2});
+              checked_set(Y, tY, not_a_year, is);
+              checked_set(m, tn, not_a_month, is);
+              checked_set(d, td, not_a_day, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'd':
+        case 'e':
+          if (command) {
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            if (modified == CharT{})
 #else
-                    if (modified != CharT{'E'})
+            if (modified != CharT{'E'})
 #endif
-                    {
-                        int td = not_a_day;
-                        read(is, rs{td, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-                        checked_set(d, td, not_a_day, is);
-                    }
+            {
+              int td = not_a_day;
+              read(is, rs{td, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              checked_set(d, td, not_a_day, is);
+            }
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        command = nullptr;
-                        width = -1;
-                        modified = CharT{};
-                        if ((err & ios::failbit) == 0)
-                            checked_set(d, tm.tm_mday, not_a_day, is);
-                        is.setstate(err);
-                    }
+            else if (modified == CharT{'O'}) {
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              command = nullptr;
+              width = -1;
+              modified = CharT{};
+              if ((err & ios::failbit) == 0) checked_set(d, tm.tm_mday, not_a_day, is);
+              is.setstate(err);
+            }
 #endif
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'H':
-                if (command)
-                {
+            else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'H':
+          if (command) {
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            if (modified == CharT{})
 #else
-                    if (modified != CharT{'E'})
+            if (modified != CharT{'E'})
 #endif
-                    {
-                        int tH = not_a_hour;
-                        read(is, ru{tH, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-                        checked_set(H, tH, not_a_hour, is);
-                    }
+            {
+              int tH = not_a_hour;
+              read(is, ru{tH, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              checked_set(H, tH, not_a_hour, is);
+            }
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                            checked_set(H, tm.tm_hour, not_a_hour, is);
-                        is.setstate(err);
-                    }
+            else if (modified == CharT{'O'}) {
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0) checked_set(H, tm.tm_hour, not_a_hour, is);
+              is.setstate(err);
+            }
 #endif
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'I':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tI = not_a_hour_12_value;
-                        // reads in an hour into I, but most be in [1, 12]
-                        read(is, rs{tI, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-                        if (!(1 <= tI && tI <= 12))
-                            is.setstate(ios::failbit);
-                        checked_set(I, tI, not_a_hour_12_value, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-               break;
-            case 'j':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tj = not_a_doy;
-                        read(is, ru{tj, 1, width == -1 ? 3u : static_cast<unsigned>(width)});
-                        checked_set(j, tj, not_a_doy, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'M':
-                if (command)
-                {
+            else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'I':
+          if (command) {
+            if (modified == CharT{}) {
+              int tI = not_a_hour_12_value;
+              // reads in an hour into I, but most be in [1, 12]
+              read(is, rs{tI, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              if (!(1 <= tI && tI <= 12)) is.setstate(ios::failbit);
+              checked_set(I, tI, not_a_hour_12_value, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'j':
+          if (command) {
+            if (modified == CharT{}) {
+              int tj = not_a_doy;
+              read(is, ru{tj, 1, width == -1 ? 3u : static_cast<unsigned>(width)});
+              checked_set(j, tj, not_a_doy, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'M':
+          if (command) {
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            if (modified == CharT{})
 #else
-                    if (modified != CharT{'E'})
+            if (modified != CharT{'E'})
 #endif
-                    {
-                        int tM = not_a_minute;
-                        read(is, ru{tM, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-                        checked_set(M, tM, not_a_minute, is);
-                    }
+            {
+              int tM = not_a_minute;
+              read(is, ru{tM, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              checked_set(M, tM, not_a_minute, is);
+            }
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                            checked_set(M, tm.tm_min, not_a_minute, is);
-                        is.setstate(err);
-                    }
+            else if (modified == CharT{'O'}) {
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0) checked_set(M, tm.tm_min, not_a_minute, is);
+              is.setstate(err);
+            }
 #endif
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'm':
-                if (command)
-                {
+            else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'm':
+          if (command) {
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            if (modified == CharT{})
 #else
-                    if (modified != CharT{'E'})
+            if (modified != CharT{'E'})
 #endif
-                    {
-                        int tn = not_a_month;
-                        read(is, rs{tn, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-                        checked_set(m, tn, not_a_month, is);
-                    }
+            {
+              int tn = not_a_month;
+              read(is, rs{tn, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              checked_set(m, tn, not_a_month, is);
+            }
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                            checked_set(m, tm.tm_mon + 1, not_a_month, is);
-                        is.setstate(err);
-                    }
+            else if (modified == CharT{'O'}) {
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0)
+                checked_set(m, tm.tm_mon + 1, not_a_month, is);
+              is.setstate(err);
+            }
 #endif
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'n':
-            case 't':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        // %n matches a single white space character
-                        // %t matches 0 or 1 white space characters
-                        auto ic = is.peek();
-                        if (Traits::eq_int_type(ic, Traits::eof()))
-                        {
-                            ios::iostate err = ios::eofbit;
-                            if (*fmt == 'n')
-                                err |= ios::failbit;
-                            is.setstate(err);
-                            break;
-                        }
-                        if (isspace(ic))
-                        {
-                            (void)is.get();
-                        }
-                        else if (*fmt == 'n')
-                            is.setstate(ios::failbit);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
+            else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'n':
+        case 't':
+          if (command) {
+            if (modified == CharT{}) {
+              // %n matches a single white space character
+              // %t matches 0 or 1 white space characters
+              auto ic = is.peek();
+              if (Traits::eq_int_type(ic, Traits::eof())) {
+                ios::iostate err = ios::eofbit;
+                if (*fmt == 'n') err |= ios::failbit;
+                is.setstate(err);
                 break;
-            case 'p':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tp = not_a_ampm;
+              }
+              if (isspace(ic)) {
+                (void)is.get();
+              } else if (*fmt == 'n')
+                is.setstate(ios::failbit);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'p':
+          if (command) {
+            if (modified == CharT{}) {
+              int tp = not_a_ampm;
 #if !ONLY_C_LOCALE
-                        tm = std::tm{};
-                        tm.tm_hour = 1;
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        is.setstate(err);
-                        if (tm.tm_hour == 1)
-                            tp = 0;
-                        else if (tm.tm_hour == 13)
-                            tp = 1;
-                        else
-                            is.setstate(err);
+              tm = std::tm{};
+              tm.tm_hour = 1;
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              is.setstate(err);
+              if (tm.tm_hour == 1)
+                tp = 0;
+              else if (tm.tm_hour == 13)
+                tp = 1;
+              else
+                is.setstate(err);
 #else
-                        auto nm = detail::ampm_names();
-                        auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-                        tp = i;
+              auto nm = detail::ampm_names();
+              auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+              tp = i;
 #endif
-                        checked_set(p, tp, not_a_ampm, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-
-               break;
-            case 'r':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
+              checked_set(p, tp, not_a_ampm, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+
+          break;
+        case 'r':
+          if (command) {
+            if (modified == CharT{}) {
 #if !ONLY_C_LOCALE
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                        {
-                            checked_set(H, tm.tm_hour, not_a_hour, is);
-                            checked_set(M, tm.tm_min, not_a_hour, is);
-                            checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
-                                        not_a_second, is);
-                        }
-                        is.setstate(err);
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0) {
+                checked_set(H, tm.tm_hour, not_a_hour, is);
+                checked_set(M, tm.tm_min, not_a_hour, is);
+                checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}), not_a_second,
+                            is);
+              }
+              is.setstate(err);
 #else
-                        // "%I:%M:%S %p"
-                        using dfs = detail::decimal_format_seconds<Duration>;
-                        CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
-                        long double S;
-                        int tI = not_a_hour_12_value;
-                        int tM = not_a_minute;
-                        read(is, ru{tI, 1, 2}, CharT{':'}, ru{tM, 1, 2},
-                                               CharT{':'}, rld{S, 1, w});
-                        checked_set(I, tI, not_a_hour_12_value, is);
-                        checked_set(M, tM, not_a_minute, is);
-                        checked_set(s, round<Duration>(duration<long double>{S}),
-                                    not_a_second, is);
-                        ws(is);
-                        auto nm = detail::ampm_names();
-                        auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
-                        checked_set(p, static_cast<int>(i), not_a_ampm, is);
+              // "%I:%M:%S %p"
+              using dfs = detail::decimal_format_seconds<Duration>;
+              CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+              long double S;
+              int tI = not_a_hour_12_value;
+              int tM = not_a_minute;
+              read(is, ru{tI, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w});
+              checked_set(I, tI, not_a_hour_12_value, is);
+              checked_set(M, tM, not_a_minute, is);
+              checked_set(s, round<Duration>(duration<long double>{S}), not_a_second, is);
+              ws(is);
+              auto nm = detail::ampm_names();
+              auto i = detail::scan_keyword(is, nm.first, nm.second) - nm.first;
+              checked_set(p, static_cast<int>(i), not_a_ampm, is);
 #endif
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'R':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tH = not_a_hour;
-                        int tM = not_a_minute;
-                        read(is, ru{tH, 1, 2}, CharT{'\0'}, CharT{':'}, CharT{'\0'},
-                                 ru{tM, 1, 2}, CharT{'\0'});
-                        checked_set(H, tH, not_a_hour, is);
-                        checked_set(M, tM, not_a_minute, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'S':
-                if (command)
-                {
- #if !ONLY_C_LOCALE
-                   if (modified == CharT{})
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'R':
+          if (command) {
+            if (modified == CharT{}) {
+              int tH = not_a_hour;
+              int tM = not_a_minute;
+              read(is, ru{tH, 1, 2}, CharT{'\0'}, CharT{':'}, CharT{'\0'}, ru{tM, 1, 2},
+                   CharT{'\0'});
+              checked_set(H, tH, not_a_hour, is);
+              checked_set(M, tM, not_a_minute, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'S':
+          if (command) {
+#if !ONLY_C_LOCALE
+            if (modified == CharT{})
 #else
-                   if (modified != CharT{'E'})
+            if (modified != CharT{'E'})
 #endif
-                    {
-                        using dfs = detail::decimal_format_seconds<Duration>;
-                        CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
-                        long double S;
-                        read(is, rld{S, 1, width == -1 ? w : static_cast<unsigned>(width)});
-                        checked_set(s, round<Duration>(duration<long double>{S}),
-                                    not_a_second, is);
-                    }
+            {
+              using dfs = detail::decimal_format_seconds<Duration>;
+              CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+              long double S;
+              read(is, rld{S, 1, width == -1 ? w : static_cast<unsigned>(width)});
+              checked_set(s, round<Duration>(duration<long double>{S}), not_a_second, is);
+            }
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'O'})
-                    {
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                            checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}),
-                                        not_a_second, is);
-                        is.setstate(err);
-                    }
+            else if (modified == CharT{'O'}) {
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0)
+                checked_set(s, duration_cast<Duration>(seconds{tm.tm_sec}), not_a_second,
+                            is);
+              is.setstate(err);
+            }
 #endif
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'T':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        using dfs = detail::decimal_format_seconds<Duration>;
-                        CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
-                        int tH = not_a_hour;
-                        int tM = not_a_minute;
-                        long double S;
-                        read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2},
-                                               CharT{':'}, rld{S, 1, w});
-                        checked_set(H, tH, not_a_hour, is);
-                        checked_set(M, tM, not_a_minute, is);
-                        checked_set(s, round<Duration>(duration<long double>{S}),
-                                    not_a_second, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'Y':
-                if (command)
-                {
+            else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'T':
+          if (command) {
+            if (modified == CharT{}) {
+              using dfs = detail::decimal_format_seconds<Duration>;
+              CONSTDATA auto w = Duration::period::den == 1 ? 2 : 3 + dfs::width;
+              int tH = not_a_hour;
+              int tM = not_a_minute;
+              long double S;
+              read(is, ru{tH, 1, 2}, CharT{':'}, ru{tM, 1, 2}, CharT{':'}, rld{S, 1, w});
+              checked_set(H, tH, not_a_hour, is);
+              checked_set(M, tM, not_a_minute, is);
+              checked_set(s, round<Duration>(duration<long double>{S}), not_a_second, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'Y':
+          if (command) {
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            if (modified == CharT{})
 #else
-                    if (modified != CharT{'O'})
+            if (modified != CharT{'O'})
 #endif
-                    {
-                        int tY = not_a_year;
-                        read(is, rs{tY, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
-                        checked_set(Y, tY, not_a_year, is);
-                    }
+            {
+              int tY = not_a_year;
+              read(is, rs{tY, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
+              checked_set(Y, tY, not_a_year, is);
+            }
 #if !ONLY_C_LOCALE
-                    else if (modified == CharT{'E'})
-                    {
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                            checked_set(Y, tm.tm_year + 1900, not_a_year, is);
-                        is.setstate(err);
-                    }
+            else if (modified == CharT{'E'}) {
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0)
+                checked_set(Y, tm.tm_year + 1900, not_a_year, is);
+              is.setstate(err);
+            }
 #endif
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'y':
-                if (command)
-                {
+            else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'y':
+          if (command) {
 #if !ONLY_C_LOCALE
-                    if (modified == CharT{})
+            if (modified == CharT{})
 #endif
-                    {
-                        int ty = not_a_2digit_year;
-                        read(is, ru{ty, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-                        checked_set(y, ty, not_a_2digit_year, is);
-                    }
+            {
+              int ty = not_a_2digit_year;
+              read(is, ru{ty, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              checked_set(y, ty, not_a_2digit_year, is);
+            }
 #if !ONLY_C_LOCALE
-                    else
-                    {
-                        ios::iostate err = ios::goodbit;
-                        f.get(is, nullptr, is, err, &tm, command, fmt+1);
-                        if ((err & ios::failbit) == 0)
-                            checked_set(Y, tm.tm_year + 1900, not_a_year, is);
-                        is.setstate(err);
-                    }
+            else {
+              ios::iostate err = ios::goodbit;
+              f.get(is, nullptr, is, err, &tm, command, fmt + 1);
+              if ((err & ios::failbit) == 0)
+                checked_set(Y, tm.tm_year + 1900, not_a_year, is);
+              is.setstate(err);
+            }
 #endif
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'g':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tg = not_a_2digit_year;
-                        read(is, ru{tg, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-                        checked_set(g, tg, not_a_2digit_year, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'G':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tG = not_a_year;
-                        read(is, rs{tG, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
-                        checked_set(G, tG, not_a_year, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'U':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tU = not_a_week_num;
-                        read(is, ru{tU, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-                        checked_set(U, tU, not_a_week_num, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'V':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tV = not_a_week_num;
-                        read(is, ru{tV, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-                        checked_set(V, tV, not_a_week_num, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'W':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        int tW = not_a_week_num;
-                        read(is, ru{tW, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
-                        checked_set(W, tW, not_a_week_num, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'E':
-            case 'O':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        modified = *fmt;
-                    }
-                    else
-                    {
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                        command = nullptr;
-                        width = -1;
-                        modified = CharT{};
-                    }
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case '%':
-                if (command)
-                {
-                    if (modified == CharT{})
-                        read(is, *fmt);
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    command = fmt;
-                break;
-            case 'z':
-                if (command)
-                {
-                    int tH, tM;
-                    minutes toff = not_a_offset;
-                    bool neg = false;
-                    auto ic = is.peek();
-                    if (!Traits::eq_int_type(ic, Traits::eof()))
-                    {
-                        auto c = static_cast<char>(Traits::to_char_type(ic));
-                        if (c == '-')
-                            neg = true;
-                    }
-                    if (modified == CharT{})
-                    {
-                        read(is, rs{tH, 2, 2});
-                        if (!is.fail())
-                            toff = hours{std::abs(tH)};
-                        if (is.good())
-                        {
-                            ic = is.peek();
-                            if (!Traits::eq_int_type(ic, Traits::eof()))
-                            {
-                                auto c = static_cast<char>(Traits::to_char_type(ic));
-                                if ('0' <= c && c <= '9')
-                                {
-                                    read(is, ru{tM, 2, 2});
-                                    if (!is.fail())
-                                        toff += minutes{tM};
-                                }
-                            }
-                        }
-                    }
-                    else
-                    {
-                        read(is, rs{tH, 1, 2});
-                        if (!is.fail())
-                            toff = hours{std::abs(tH)};
-                        if (is.good())
-                        {
-                            ic = is.peek();
-                            if (!Traits::eq_int_type(ic, Traits::eof()))
-                            {
-                                auto c = static_cast<char>(Traits::to_char_type(ic));
-                                if (c == ':')
-                                {
-                                    (void)is.get();
-                                    read(is, ru{tM, 2, 2});
-                                    if (!is.fail())
-                                        toff += minutes{tM};
-                                }
-                            }
-                        }
-                    }
-                    if (neg)
-                        toff = -toff;
-                    checked_set(temp_offset, toff, not_a_offset, is);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            case 'Z':
-                if (command)
-                {
-                    if (modified == CharT{})
-                    {
-                        std::basic_string<CharT, Traits, Alloc> buf;
-                        while (is.rdstate() == std::ios::goodbit)
-                        {
-                            auto i = is.rdbuf()->sgetc();
-                            if (Traits::eq_int_type(i, Traits::eof()))
-                            {
-                                is.setstate(ios::eofbit);
-                                break;
-                            }
-                            auto wc = Traits::to_char_type(i);
-                            auto c = static_cast<char>(wc);
-                            // is c a valid time zone name or abbreviation character?
-                            if (!(CharT{1} < wc && wc < CharT{127}) || !(isalnum(c) ||
-                                    c == '_' || c == '/' || c == '-' || c == '+'))
-                                break;
-                            buf.push_back(c);
-                            is.rdbuf()->sbumpc();
-                        }
-                        if (buf.empty())
-                            is.setstate(ios::failbit);
-                        checked_set(temp_abbrev, buf, {}, is);
-                    }
-                    else
-                        read(is, CharT{'%'}, width, modified, *fmt);
-                    command = nullptr;
-                    width = -1;
-                    modified = CharT{};
-                }
-                else
-                    read(is, *fmt);
-                break;
-            default:
-                if (command)
-                {
-                    if (width == -1 && modified == CharT{} && '0' <= *fmt && *fmt <= '9')
-                    {
-                        width = static_cast<char>(*fmt) - '0';
-                        while ('0' <= fmt[1] && fmt[1] <= '9')
-                            width = 10*width + static_cast<char>(*++fmt) - '0';
-                    }
-                    else
-                    {
-                        if (modified == CharT{})
-                            read(is, CharT{'%'}, width, *fmt);
-                        else
-                            read(is, CharT{'%'}, width, modified, *fmt);
-                        command = nullptr;
-                        width = -1;
-                        modified = CharT{};
-                    }
-                }
-                else  // !command
-                {
-                    if (isspace(static_cast<unsigned char>(*fmt)))
-                        ws(is); // space matches 0 or more white space characters
-                    else
-                        read(is, *fmt);
-                }
-                break;
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'g':
+          if (command) {
+            if (modified == CharT{}) {
+              int tg = not_a_2digit_year;
+              read(is, ru{tg, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              checked_set(g, tg, not_a_2digit_year, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'G':
+          if (command) {
+            if (modified == CharT{}) {
+              int tG = not_a_year;
+              read(is, rs{tG, 1, width == -1 ? 4u : static_cast<unsigned>(width)});
+              checked_set(G, tG, not_a_year, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'U':
+          if (command) {
+            if (modified == CharT{}) {
+              int tU = not_a_week_num;
+              read(is, ru{tU, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              checked_set(U, tU, not_a_week_num, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'V':
+          if (command) {
+            if (modified == CharT{}) {
+              int tV = not_a_week_num;
+              read(is, ru{tV, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              checked_set(V, tV, not_a_week_num, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'W':
+          if (command) {
+            if (modified == CharT{}) {
+              int tW = not_a_week_num;
+              read(is, ru{tW, 1, width == -1 ? 2u : static_cast<unsigned>(width)});
+              checked_set(W, tW, not_a_week_num, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'E':
+        case 'O':
+          if (command) {
+            if (modified == CharT{}) {
+              modified = *fmt;
+            } else {
+              read(is, CharT{'%'}, width, modified, *fmt);
+              command = nullptr;
+              width = -1;
+              modified = CharT{};
             }
-        }
-        // is.rdstate() != ios::goodbit || *fmt == CharT{}
-        if (is.rdstate() == ios::goodbit && command)
-        {
+          } else
+            read(is, *fmt);
+          break;
+        case '%':
+          if (command) {
             if (modified == CharT{})
-                read(is, CharT{'%'}, width);
+              read(is, *fmt);
             else
-                read(is, CharT{'%'}, width, modified);
-        }
-        if (is.rdstate() != ios::goodbit && *fmt != CharT{} && !is.fail())
-            is.setstate(ios::failbit);
-        if (!is.fail())
-        {
-            if (y != not_a_2digit_year)
-            {
-                // Convert y and an optional C to Y
-                if (!(0 <= y && y <= 99))
-                    goto broken;
-                if (C == not_a_century)
-                {
-                    if (Y == not_a_year)
-                    {
-                        if (y >= 69)
-                            C = 19;
-                        else
-                            C = 20;
-                    }
-                    else
-                    {
-                        C = (Y >= 0 ? Y : Y-100) / 100;
-                    }
-                }
-                int tY;
-                if (C >= 0)
-                    tY = 100*C + y;
-                else
-                    tY = 100*(C+1) - (y == 0 ? 100 : y);
-                if (Y != not_a_year && Y != tY)
-                    goto broken;
-                Y = tY;
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            command = fmt;
+          break;
+        case 'z':
+          if (command) {
+            int tH, tM;
+            minutes toff = not_a_offset;
+            bool neg = false;
+            auto ic = is.peek();
+            if (!Traits::eq_int_type(ic, Traits::eof())) {
+              auto c = static_cast<char>(Traits::to_char_type(ic));
+              if (c == '-') neg = true;
             }
-            if (g != not_a_2digit_year)
-            {
-                // Convert g and an optional C to G
-                if (!(0 <= g && g <= 99))
-                    goto broken;
-                if (C == not_a_century)
-                {
-                    if (G == not_a_year)
-                    {
-                        if (g >= 69)
-                            C = 19;
-                        else
-                            C = 20;
-                    }
-                    else
-                    {
-                        C = (G >= 0 ? G : G-100) / 100;
-                    }
+            if (modified == CharT{}) {
+              read(is, rs{tH, 2, 2});
+              if (!is.fail()) toff = hours{std::abs(tH)};
+              if (is.good()) {
+                ic = is.peek();
+                if (!Traits::eq_int_type(ic, Traits::eof())) {
+                  auto c = static_cast<char>(Traits::to_char_type(ic));
+                  if ('0' <= c && c <= '9') {
+                    read(is, ru{tM, 2, 2});
+                    if (!is.fail()) toff += minutes{tM};
+                  }
                 }
-                int tG;
-                if (C >= 0)
-                    tG = 100*C + g;
-                else
-                    tG = 100*(C+1) - (g == 0 ? 100 : g);
-                if (G != not_a_year && G != tG)
-                    goto broken;
-                G = tG;
-            }
-            if (Y < static_cast<int>(year::min()) || Y > static_cast<int>(year::max()))
-                Y = not_a_year;
-            bool computed = false;
-            if (G != not_a_year && V != not_a_week_num && wd != not_a_weekday)
-            {
-                year_month_day ymd_trial = sys_days(year{G-1}/December/Thursday[last]) +
-                                           (Monday-Thursday) + weeks{V-1} +
-                                           (weekday{static_cast<unsigned>(wd)}-Monday);
-                if (Y == not_a_year)
-                    Y = static_cast<int>(ymd_trial.year());
-                else if (year{Y} != ymd_trial.year())
-                    goto broken;
-                if (m == not_a_month)
-                    m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
-                else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
-                    goto broken;
-                if (d == not_a_day)
-                    d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
-                else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
-                    goto broken;
-                computed = true;
-            }
-            if (Y != not_a_year && U != not_a_week_num && wd != not_a_weekday)
-            {
-                year_month_day ymd_trial = sys_days(year{Y}/January/Sunday[1]) +
-                                           weeks{U-1} +
-                                           (weekday{static_cast<unsigned>(wd)} - Sunday);
-                if (Y == not_a_year)
-                    Y = static_cast<int>(ymd_trial.year());
-                else if (year{Y} != ymd_trial.year())
-                    goto broken;
-                if (m == not_a_month)
-                    m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
-                else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
-                    goto broken;
-                if (d == not_a_day)
-                    d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
-                else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
-                    goto broken;
-                computed = true;
-            }
-            if (Y != not_a_year && W != not_a_week_num && wd != not_a_weekday)
-            {
-                year_month_day ymd_trial = sys_days(year{Y}/January/Monday[1]) +
-                                           weeks{W-1} +
-                                           (weekday{static_cast<unsigned>(wd)} - Monday);
-                if (Y == not_a_year)
-                    Y = static_cast<int>(ymd_trial.year());
-                else if (year{Y} != ymd_trial.year())
-                    goto broken;
-                if (m == not_a_month)
-                    m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
-                else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
-                    goto broken;
-                if (d == not_a_day)
-                    d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
-                else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
-                    goto broken;
-                computed = true;
-            }
-            if (j != not_a_doy && Y != not_a_year)
-            {
-                auto ymd_trial = year_month_day{local_days(year{Y}/1/1) + days{j-1}};
-                if (m == 0)
-                    m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
-                else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
-                    goto broken;
-                if (d == 0)
-                    d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
-                else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
-                    goto broken;
-                j = not_a_doy;
-            }
-            auto ymd = year{Y}/m/d;
-            if (ymd.ok())
-            {
-                if (wd == not_a_weekday)
-                    wd = static_cast<int>((weekday(sys_days(ymd)) - Sunday).count());
-                else if (wd != static_cast<int>((weekday(sys_days(ymd)) - Sunday).count()))
-                    goto broken;
-                if (!computed)
-                {
-                    if (G != not_a_year || V != not_a_week_num)
-                    {
-                        sys_days sd = ymd;
-                        auto G_trial = year_month_day{sd + days{3}}.year();
-                        auto start = sys_days((G_trial - years{1})/December/Thursday[last]) +
-                                     (Monday - Thursday);
-                        if (sd < start)
-                        {
-                            --G_trial;
-                            if (V != not_a_week_num)
-                                start = sys_days((G_trial - years{1})/December/Thursday[last])
-                                        + (Monday - Thursday);
-                        }
-                        if (G != not_a_year && G != static_cast<int>(G_trial))
-                            goto broken;
-                        if (V != not_a_week_num)
-                        {
-                            auto V_trial = duration_cast<weeks>(sd - start).count() + 1;
-                            if (V != V_trial)
-                                goto broken;
-                        }
-                    }
-                    if (U != not_a_week_num)
-                    {
-                        auto start = sys_days(Sunday[1]/January/ymd.year());
-                        auto U_trial = floor<weeks>(sys_days(ymd) - start).count() + 1;
-                        if (U != U_trial)
-                            goto broken;
-                    }
-                    if (W != not_a_week_num)
-                    {
-                        auto start = sys_days(Monday[1]/January/ymd.year());
-                        auto W_trial = floor<weeks>(sys_days(ymd) - start).count() + 1;
-                        if (W != W_trial)
-                            goto broken;
-                    }
+              }
+            } else {
+              read(is, rs{tH, 1, 2});
+              if (!is.fail()) toff = hours{std::abs(tH)};
+              if (is.good()) {
+                ic = is.peek();
+                if (!Traits::eq_int_type(ic, Traits::eof())) {
+                  auto c = static_cast<char>(Traits::to_char_type(ic));
+                  if (c == ':') {
+                    (void)is.get();
+                    read(is, ru{tM, 2, 2});
+                    if (!is.fail()) toff += minutes{tM};
+                  }
                 }
+              }
             }
-            fds.ymd = ymd;
-            if (I != not_a_hour_12_value)
-            {
-                if (!(1 <= I && I <= 12))
-                    goto broken;
-                if (p != not_a_ampm)
-                {
-                    // p is in [0, 1] == [AM, PM]
-                    // Store trial H in I
-                    if (I == 12)
-                        --p;
-                    I += p*12;
-                    // Either set H from I or make sure H and I are consistent
-                    if (H == not_a_hour)
-                        H = I;
-                    else if (I != H)
-                        goto broken;
-                }
-                else  // p == not_a_ampm
-                {
-                    // if H, make sure H and I could be consistent
-                    if (H != not_a_hour)
-                    {
-                        if (I == 12)
-                        {
-                            if (H != 0 && H != 12)
-                                goto broken;
-                        }
-                        else if (!(I == H || I == H+12))
-                        {
-                            goto broken;
-                        }
-                    }
+            if (neg) toff = -toff;
+            checked_set(temp_offset, toff, not_a_offset, is);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        case 'Z':
+          if (command) {
+            if (modified == CharT{}) {
+              std::basic_string<CharT, Traits, Alloc> buf;
+              while (is.rdstate() == std::ios::goodbit) {
+                auto i = is.rdbuf()->sgetc();
+                if (Traits::eq_int_type(i, Traits::eof())) {
+                  is.setstate(ios::eofbit);
+                  break;
                 }
+                auto wc = Traits::to_char_type(i);
+                auto c = static_cast<char>(wc);
+                // is c a valid time zone name or abbreviation character?
+                if (!(CharT{1} < wc && wc < CharT{127}) ||
+                    !(isalnum(c) || c == '_' || c == '/' || c == '-' || c == '+'))
+                  break;
+                buf.push_back(c);
+                is.rdbuf()->sbumpc();
+              }
+              if (buf.empty()) is.setstate(ios::failbit);
+              checked_set(temp_abbrev, buf, {}, is);
+            } else
+              read(is, CharT{'%'}, width, modified, *fmt);
+            command = nullptr;
+            width = -1;
+            modified = CharT{};
+          } else
+            read(is, *fmt);
+          break;
+        default:
+          if (command) {
+            if (width == -1 && modified == CharT{} && '0' <= *fmt && *fmt <= '9') {
+              width = static_cast<char>(*fmt) - '0';
+              while ('0' <= fmt[1] && fmt[1] <= '9')
+                width = 10 * width + static_cast<char>(*++fmt) - '0';
+            } else {
+              if (modified == CharT{})
+                read(is, CharT{'%'}, width, *fmt);
+              else
+                read(is, CharT{'%'}, width, modified, *fmt);
+              command = nullptr;
+              width = -1;
+              modified = CharT{};
             }
-            if (H != not_a_hour)
-            {
-                fds.has_tod = true;
-                fds.tod = hh_mm_ss<Duration>{hours{H}};
-            }
-            if (M != not_a_minute)
-            {
-                fds.has_tod = true;
-                fds.tod.m_ = minutes{M};
+          } else  // !command
+          {
+            if (isspace(static_cast<unsigned char>(*fmt)))
+              ws(is);  // space matches 0 or more white space characters
+            else
+              read(is, *fmt);
+          }
+          break;
+      }
+    }
+    // is.rdstate() != ios::goodbit || *fmt == CharT{}
+    if (is.rdstate() == ios::goodbit && command) {
+      if (modified == CharT{})
+        read(is, CharT{'%'}, width);
+      else
+        read(is, CharT{'%'}, width, modified);
+    }
+    if (is.rdstate() != ios::goodbit && *fmt != CharT{} && !is.fail())
+      is.setstate(ios::failbit);
+    if (!is.fail()) {
+      if (y != not_a_2digit_year) {
+        // Convert y and an optional C to Y
+        if (!(0 <= y && y <= 99)) goto broken;
+        if (C == not_a_century) {
+          if (Y == not_a_year) {
+            if (y >= 69)
+              C = 19;
+            else
+              C = 20;
+          } else {
+            C = (Y >= 0 ? Y : Y - 100) / 100;
+          }
+        }
+        int tY;
+        if (C >= 0)
+          tY = 100 * C + y;
+        else
+          tY = 100 * (C + 1) - (y == 0 ? 100 : y);
+        if (Y != not_a_year && Y != tY) goto broken;
+        Y = tY;
+      }
+      if (g != not_a_2digit_year) {
+        // Convert g and an optional C to G
+        if (!(0 <= g && g <= 99)) goto broken;
+        if (C == not_a_century) {
+          if (G == not_a_year) {
+            if (g >= 69)
+              C = 19;
+            else
+              C = 20;
+          } else {
+            C = (G >= 0 ? G : G - 100) / 100;
+          }
+        }
+        int tG;
+        if (C >= 0)
+          tG = 100 * C + g;
+        else
+          tG = 100 * (C + 1) - (g == 0 ? 100 : g);
+        if (G != not_a_year && G != tG) goto broken;
+        G = tG;
+      }
+      if (Y < static_cast<int>(year::min()) || Y > static_cast<int>(year::max()))
+        Y = not_a_year;
+      bool computed = false;
+      if (G != not_a_year && V != not_a_week_num && wd != not_a_weekday) {
+        year_month_day ymd_trial = sys_days(year{G - 1} / December / Thursday[last]) +
+                                   (Monday - Thursday) + weeks{V - 1} +
+                                   (weekday{static_cast<unsigned>(wd)} - Monday);
+        if (Y == not_a_year)
+          Y = static_cast<int>(ymd_trial.year());
+        else if (year{Y} != ymd_trial.year())
+          goto broken;
+        if (m == not_a_month)
+          m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
+        else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
+          goto broken;
+        if (d == not_a_day)
+          d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
+        else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
+          goto broken;
+        computed = true;
+      }
+      if (Y != not_a_year && U != not_a_week_num && wd != not_a_weekday) {
+        year_month_day ymd_trial = sys_days(year{Y} / January / Sunday[1]) +
+                                   weeks{U - 1} +
+                                   (weekday{static_cast<unsigned>(wd)} - Sunday);
+        if (Y == not_a_year)
+          Y = static_cast<int>(ymd_trial.year());
+        else if (year{Y} != ymd_trial.year())
+          goto broken;
+        if (m == not_a_month)
+          m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
+        else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
+          goto broken;
+        if (d == not_a_day)
+          d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
+        else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
+          goto broken;
+        computed = true;
+      }
+      if (Y != not_a_year && W != not_a_week_num && wd != not_a_weekday) {
+        year_month_day ymd_trial = sys_days(year{Y} / January / Monday[1]) +
+                                   weeks{W - 1} +
+                                   (weekday{static_cast<unsigned>(wd)} - Monday);
+        if (Y == not_a_year)
+          Y = static_cast<int>(ymd_trial.year());
+        else if (year{Y} != ymd_trial.year())
+          goto broken;
+        if (m == not_a_month)
+          m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
+        else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
+          goto broken;
+        if (d == not_a_day)
+          d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
+        else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
+          goto broken;
+        computed = true;
+      }
+      if (j != not_a_doy && Y != not_a_year) {
+        auto ymd_trial = year_month_day{local_days(year{Y} / 1 / 1) + days{j - 1}};
+        if (m == 0)
+          m = static_cast<int>(static_cast<unsigned>(ymd_trial.month()));
+        else if (month(static_cast<unsigned>(m)) != ymd_trial.month())
+          goto broken;
+        if (d == 0)
+          d = static_cast<int>(static_cast<unsigned>(ymd_trial.day()));
+        else if (day(static_cast<unsigned>(d)) != ymd_trial.day())
+          goto broken;
+        j = not_a_doy;
+      }
+      auto ymd = year{Y} / m / d;
+      if (ymd.ok()) {
+        if (wd == not_a_weekday)
+          wd = static_cast<int>((weekday(sys_days(ymd)) - Sunday).count());
+        else if (wd != static_cast<int>((weekday(sys_days(ymd)) - Sunday).count()))
+          goto broken;
+        if (!computed) {
+          if (G != not_a_year || V != not_a_week_num) {
+            sys_days sd = ymd;
+            auto G_trial = year_month_day{sd + days{3}}.year();
+            auto start = sys_days((G_trial - years{1}) / December / Thursday[last]) +
+                         (Monday - Thursday);
+            if (sd < start) {
+              --G_trial;
+              if (V != not_a_week_num)
+                start = sys_days((G_trial - years{1}) / December / Thursday[last]) +
+                        (Monday - Thursday);
             }
-            if (s != not_a_second)
-            {
-                fds.has_tod = true;
-                fds.tod.s_ = detail::decimal_format_seconds<Duration>{s};
+            if (G != not_a_year && G != static_cast<int>(G_trial)) goto broken;
+            if (V != not_a_week_num) {
+              auto V_trial = duration_cast<weeks>(sd - start).count() + 1;
+              if (V != V_trial) goto broken;
             }
-            if (j != not_a_doy)
-            {
-                fds.has_tod = true;
-                fds.tod.h_ += hours{days{j}};
+          }
+          if (U != not_a_week_num) {
+            auto start = sys_days(Sunday[1] / January / ymd.year());
+            auto U_trial = floor<weeks>(sys_days(ymd) - start).count() + 1;
+            if (U != U_trial) goto broken;
+          }
+          if (W != not_a_week_num) {
+            auto start = sys_days(Monday[1] / January / ymd.year());
+            auto W_trial = floor<weeks>(sys_days(ymd) - start).count() + 1;
+            if (W != W_trial) goto broken;
+          }
+        }
+      }
+      fds.ymd = ymd;
+      if (I != not_a_hour_12_value) {
+        if (!(1 <= I && I <= 12)) goto broken;
+        if (p != not_a_ampm) {
+          // p is in [0, 1] == [AM, PM]
+          // Store trial H in I
+          if (I == 12) --p;
+          I += p * 12;
+          // Either set H from I or make sure H and I are consistent
+          if (H == not_a_hour)
+            H = I;
+          else if (I != H)
+            goto broken;
+        } else  // p == not_a_ampm
+        {
+          // if H, make sure H and I could be consistent
+          if (H != not_a_hour) {
+            if (I == 12) {
+              if (H != 0 && H != 12) goto broken;
+            } else if (!(I == H || I == H + 12)) {
+              goto broken;
             }
-            if (wd != not_a_weekday)
-                fds.wd = weekday{static_cast<unsigned>(wd)};
-            if (abbrev != nullptr)
-                *abbrev = std::move(temp_abbrev);
-            if (offset != nullptr && temp_offset != not_a_offset)
-              *offset = temp_offset;
+          }
         }
-       return is;
+      }
+      if (H != not_a_hour) {
+        fds.has_tod = true;
+        fds.tod = hh_mm_ss<Duration>{hours{H}};
+      }
+      if (M != not_a_minute) {
+        fds.has_tod = true;
+        fds.tod.m_ = minutes{M};
+      }
+      if (s != not_a_second) {
+        fds.has_tod = true;
+        fds.tod.s_ = detail::decimal_format_seconds<Duration>{s};
+      }
+      if (j != not_a_doy) {
+        fds.has_tod = true;
+        fds.tod.h_ += hours{days{j}};
+      }
+      if (wd != not_a_weekday) fds.wd = weekday{static_cast<unsigned>(wd)};
+      if (abbrev != nullptr) *abbrev = std::move(temp_abbrev);
+      if (offset != nullptr && temp_offset != not_a_offset) *offset = temp_offset;
     }
-broken:
-    is.setstate(ios::failbit);
     return is;
+  }
+broken:
+  is.setstate(ios::failbit);
+  return is;
 }
 
 template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, year& y,
-            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{};
-    from_stream(is, fmt, fds, abbrev, offset);
-    if (!fds.ymd.year().ok())
-        is.setstate(std::ios::failbit);
-    if (!is.fail())
-        y = fds.ymd.year();
-    return is;
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, year& y,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{};
+  from_stream(is, fmt, fds, abbrev, offset);
+  if (!fds.ymd.year().ok()) is.setstate(std::ios::failbit);
+  if (!is.fail()) y = fds.ymd.year();
+  return is;
 }
 
 template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, month& m,
-            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{};
-    from_stream(is, fmt, fds, abbrev, offset);
-    if (!fds.ymd.month().ok())
-        is.setstate(std::ios::failbit);
-    if (!is.fail())
-        m = fds.ymd.month();
-    return is;
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, month& m,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{};
+  from_stream(is, fmt, fds, abbrev, offset);
+  if (!fds.ymd.month().ok()) is.setstate(std::ios::failbit);
+  if (!is.fail()) m = fds.ymd.month();
+  return is;
 }
 
 template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, day& d,
-            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{};
-    from_stream(is, fmt, fds, abbrev, offset);
-    if (!fds.ymd.day().ok())
-        is.setstate(std::ios::failbit);
-    if (!is.fail())
-        d = fds.ymd.day();
-    return is;
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, day& d,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{};
+  from_stream(is, fmt, fds, abbrev, offset);
+  if (!fds.ymd.day().ok()) is.setstate(std::ios::failbit);
+  if (!is.fail()) d = fds.ymd.day();
+  return is;
 }
 
 template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, weekday& wd,
-            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{};
-    from_stream(is, fmt, fds, abbrev, offset);
-    if (!fds.wd.ok())
-        is.setstate(std::ios::failbit);
-    if (!is.fail())
-        wd = fds.wd;
-    return is;
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, weekday& wd,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{};
+  from_stream(is, fmt, fds, abbrev, offset);
+  if (!fds.wd.ok()) is.setstate(std::ios::failbit);
+  if (!is.fail()) wd = fds.wd;
+  return is;
 }
 
 template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, year_month& ym,
-            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{};
-    from_stream(is, fmt, fds, abbrev, offset);
-    if (!fds.ymd.month().ok())
-        is.setstate(std::ios::failbit);
-    if (!is.fail())
-        ym = fds.ymd.year()/fds.ymd.month();
-    return is;
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, year_month& ym,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{};
+  from_stream(is, fmt, fds, abbrev, offset);
+  if (!fds.ymd.month().ok()) is.setstate(std::ios::failbit);
+  if (!is.fail()) ym = fds.ymd.year() / fds.ymd.month();
+  return is;
 }
 
 template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt, month_day& md,
-            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{};
-    from_stream(is, fmt, fds, abbrev, offset);
-    if (!fds.ymd.month().ok() || !fds.ymd.day().ok())
-        is.setstate(std::ios::failbit);
-    if (!is.fail())
-        md = fds.ymd.month()/fds.ymd.day();
-    return is;
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, month_day& md,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{};
+  from_stream(is, fmt, fds, abbrev, offset);
+  if (!fds.ymd.month().ok() || !fds.ymd.day().ok()) is.setstate(std::ios::failbit);
+  if (!is.fail()) md = fds.ymd.month() / fds.ymd.day();
+  return is;
 }
 
 template <class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
-            year_month_day& ymd, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr)
-{
-    using CT = std::chrono::seconds;
-    fields<CT> fds{};
-    from_stream(is, fmt, fds, abbrev, offset);
-    if (!fds.ymd.ok())
-        is.setstate(std::ios::failbit);
-    if (!is.fail())
-        ymd = fds.ymd;
-    return is;
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, year_month_day& ymd,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr) {
+  using CT = std::chrono::seconds;
+  fields<CT> fds{};
+  from_stream(is, fmt, fds, abbrev, offset);
+  if (!fds.ymd.ok()) is.setstate(std::ios::failbit);
+  if (!is.fail()) ymd = fds.ymd;
+  return is;
 }
 
 template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
-            sys_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr)
-{
-    using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
-    std::chrono::minutes offset_local{};
-    auto offptr = offset ? offset : &offset_local;
-    fields<CT> fds{};
-    fds.has_tod = true;
-    from_stream(is, fmt, fds, abbrev, offptr);
-    if (!fds.ymd.ok() || !fds.tod.in_conventional_range())
-        is.setstate(std::ios::failbit);
-    if (!is.fail())
-        tp = round<Duration>(sys_days(fds.ymd) - *offptr + fds.tod.to_duration());
-    return is;
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, sys_time<Duration>& tp,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr) {
+  using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+  std::chrono::minutes offset_local{};
+  auto offptr = offset ? offset : &offset_local;
+  fields<CT> fds{};
+  fds.has_tod = true;
+  from_stream(is, fmt, fds, abbrev, offptr);
+  if (!fds.ymd.ok() || !fds.tod.in_conventional_range()) is.setstate(std::ios::failbit);
+  if (!is.fail())
+    tp = round<Duration>(sys_days(fds.ymd) - *offptr + fds.tod.to_duration());
+  return is;
 }
 
 template <class Duration, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
-            local_time<Duration>& tp, std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr)
-{
-    using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
-    fields<CT> fds{};
-    fds.has_tod = true;
-    from_stream(is, fmt, fds, abbrev, offset);
-    if (!fds.ymd.ok() || !fds.tod.in_conventional_range())
-        is.setstate(std::ios::failbit);
-    if (!is.fail())
-        tp = round<Duration>(local_seconds{local_days(fds.ymd)} + fds.tod.to_duration());
-    return is;
-}
-
-template <class Rep, class Period, class CharT, class Traits, class Alloc = std::allocator<CharT>>
-std::basic_istream<CharT, Traits>&
-from_stream(std::basic_istream<CharT, Traits>& is, const CharT* fmt,
-            std::chrono::duration<Rep, Period>& d,
-            std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-            std::chrono::minutes* offset = nullptr)
-{
-    using Duration = std::chrono::duration<Rep, Period>;
-    using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
-    fields<CT> fds{};
-    from_stream(is, fmt, fds, abbrev, offset);
-    if (!fds.has_tod)
-        is.setstate(std::ios::failbit);
-    if (!is.fail())
-        d = std::chrono::duration_cast<Duration>(fds.tod.to_duration());
-    return is;
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt, local_time<Duration>& tp,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr) {
+  using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+  fields<CT> fds{};
+  fds.has_tod = true;
+  from_stream(is, fmt, fds, abbrev, offset);
+  if (!fds.ymd.ok() || !fds.tod.in_conventional_range()) is.setstate(std::ios::failbit);
+  if (!is.fail())
+    tp = round<Duration>(local_seconds{local_days(fds.ymd)} + fds.tod.to_duration());
+  return is;
+}
+
+template <class Rep, class Period, class CharT, class Traits,
+          class Alloc = std::allocator<CharT>>
+std::basic_istream<CharT, Traits>& from_stream(
+    std::basic_istream<CharT, Traits>& is, const CharT* fmt,
+    std::chrono::duration<Rep, Period>& d,
+    std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+    std::chrono::minutes* offset = nullptr) {
+  using Duration = std::chrono::duration<Rep, Period>;
+  using CT = typename std::common_type<Duration, std::chrono::seconds>::type;
+  fields<CT> fds{};
+  from_stream(is, fmt, fds, abbrev, offset);
+  if (!fds.has_tod) is.setstate(std::ios::failbit);
+  if (!is.fail()) d = std::chrono::duration_cast<Duration>(fds.tod.to_duration());
+  return is;
 }
 
 template <class Parsable, class CharT, class Traits = std::char_traits<CharT>,
           class Alloc = std::allocator<CharT>>
-struct parse_manip
-{
-    const std::basic_string<CharT, Traits, Alloc> format_;
-    Parsable&                                     tp_;
-    std::basic_string<CharT, Traits, Alloc>*      abbrev_;
-    std::chrono::minutes*                         offset_;
-
-public:
-    parse_manip(std::basic_string<CharT, Traits, Alloc> format, Parsable& tp,
-                std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
-                std::chrono::minutes* offset = nullptr)
-        : format_(std::move(format))
-        , tp_(tp)
-        , abbrev_(abbrev)
-        , offset_(offset)
-        {}
-
+struct parse_manip {
+  const std::basic_string<CharT, Traits, Alloc> format_;
+  Parsable& tp_;
+  std::basic_string<CharT, Traits, Alloc>* abbrev_;
+  std::chrono::minutes* offset_;
+
+ public:
+  parse_manip(std::basic_string<CharT, Traits, Alloc> format, Parsable& tp,
+              std::basic_string<CharT, Traits, Alloc>* abbrev = nullptr,
+              std::chrono::minutes* offset = nullptr)
+      : format_(std::move(format)), tp_(tp), abbrev_(abbrev), offset_(offset) {}
 };
 
 template <class Parsable, class CharT, class Traits, class Alloc>
-std::basic_istream<CharT, Traits>&
-operator>>(std::basic_istream<CharT, Traits>& is,
-           const parse_manip<Parsable, CharT, Traits, Alloc>& x)
-{
-    return from_stream(is, x.format_.c_str(), x.tp_, x.abbrev_, x.offset_);
+std::basic_istream<CharT, Traits>& operator>>(
+    std::basic_istream<CharT, Traits>& is,
+    const parse_manip<Parsable, CharT, Traits, Alloc>& x) {
+  return from_stream(is, x.format_.c_str(), x.tp_, x.abbrev_, x.offset_);
 }
 
 template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp)
+inline auto parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp)
     -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
                             format.c_str(), tp),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp})
-{
-    return {format, tp};
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp}) {
+  return {format, tp};
 }
 
 template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
-      std::basic_string<CharT, Traits, Alloc>& abbrev)
+inline auto parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
+                  std::basic_string<CharT, Traits, Alloc>& abbrev)
     -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
                             format.c_str(), tp, &abbrev),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev})
-{
-    return {format, tp, &abbrev};
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev}) {
+  return {format, tp, &abbrev};
 }
 
 template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
-      std::chrono::minutes& offset)
+inline auto parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
+                  std::chrono::minutes& offset)
     -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
                             format.c_str(), tp,
                             std::declval<std::basic_string<CharT, Traits, Alloc>*>(),
                             &offset),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, nullptr, &offset})
-{
-    return {format, tp, nullptr, &offset};
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, nullptr,
+                                                            &offset}) {
+  return {format, tp, nullptr, &offset};
 }
 
 template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
-      std::basic_string<CharT, Traits, Alloc>& abbrev, std::chrono::minutes& offset)
+inline auto parse(const std::basic_string<CharT, Traits, Alloc>& format, Parsable& tp,
+                  std::basic_string<CharT, Traits, Alloc>& abbrev,
+                  std::chrono::minutes& offset)
     -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(),
                             format.c_str(), tp, &abbrev, &offset),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev, &offset})
-{
-    return {format, tp, &abbrev, &offset};
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev,
+                                                            &offset}) {
+  return {format, tp, &abbrev, &offset};
 }
 
 // const CharT* formats
 
 template <class Parsable, class CharT>
-inline
-auto
-parse(const CharT* format, Parsable& tp)
+inline auto parse(const CharT* format, Parsable& tp)
     -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format, tp),
-                parse_manip<Parsable, CharT>{format, tp})
-{
-    return {format, tp};
+                parse_manip<Parsable, CharT>{format, tp}) {
+  return {format, tp};
 }
 
 template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const CharT* format, Parsable& tp, std::basic_string<CharT, Traits, Alloc>& abbrev)
+inline auto parse(const CharT* format, Parsable& tp,
+                  std::basic_string<CharT, Traits, Alloc>& abbrev)
     -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(), format,
                             tp, &abbrev),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev})
-{
-    return {format, tp, &abbrev};
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev}) {
+  return {format, tp, &abbrev};
 }
 
 template <class Parsable, class CharT>
-inline
-auto
-parse(const CharT* format, Parsable& tp, std::chrono::minutes& offset)
-    -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format,
-                            tp, std::declval<std::basic_string<CharT>*>(), &offset),
-                parse_manip<Parsable, CharT>{format, tp, nullptr, &offset})
-{
-    return {format, tp, nullptr, &offset};
+inline auto parse(const CharT* format, Parsable& tp, std::chrono::minutes& offset)
+    -> decltype(from_stream(std::declval<std::basic_istream<CharT>&>(), format, tp,
+                            std::declval<std::basic_string<CharT>*>(), &offset),
+                parse_manip<Parsable, CharT>{format, tp, nullptr, &offset}) {
+  return {format, tp, nullptr, &offset};
 }
 
 template <class Parsable, class CharT, class Traits, class Alloc>
-inline
-auto
-parse(const CharT* format, Parsable& tp,
-      std::basic_string<CharT, Traits, Alloc>& abbrev, std::chrono::minutes& offset)
+inline auto parse(const CharT* format, Parsable& tp,
+                  std::basic_string<CharT, Traits, Alloc>& abbrev,
+                  std::chrono::minutes& offset)
     -> decltype(from_stream(std::declval<std::basic_istream<CharT, Traits>&>(), format,
                             tp, &abbrev, &offset),
-                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev, &offset})
-{
-    return {format, tp, &abbrev, &offset};
+                parse_manip<Parsable, CharT, Traits, Alloc>{format, tp, &abbrev,
+                                                            &offset}) {
+  return {format, tp, &abbrev, &offset};
 }
 
 // duration streaming
 
 template <class CharT, class Traits, class Rep, class Period>
-inline
-std::basic_ostream<CharT, Traits>&
-operator<<(std::basic_ostream<CharT, Traits>& os,
-           const std::chrono::duration<Rep, Period>& d)
-{
-    return os << detail::make_string<CharT, Traits>::from(d.count()) +
-                 detail::get_units<CharT>(typename Period::type{});
+inline std::basic_ostream<CharT, Traits>& operator<<(
+    std::basic_ostream<CharT, Traits>& os, const std::chrono::duration<Rep, Period>& d) {
+  return os << detail::make_string<CharT, Traits>::from(d.count()) +
+                   detail::get_units<CharT>(typename Period::type{});
 }
 
 }  // namespace date
 }  // namespace arrow_vendored
 
 #ifdef _MSC_VER
-#   pragma warning(pop)
+#pragma warning(pop)
 #endif
 
 #ifdef __GNUC__
-# pragma GCC diagnostic pop
+#pragma GCC diagnostic pop
 #endif
 
 #endif  // DATE_H
diff --git a/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.cc b/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.cc
index e03c4d630..781914556 100644
--- a/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.cc
+++ b/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.cc
@@ -29,10 +29,11 @@
 #include "gandiva/decimal_xlarge.h"
 #include "gandiva/gdv_function_stubs.h"
 
-// Several operations (multiply, divide, mod, ..) require converting to 256-bit, and we
-// use the boost library for doing 256-bit operations. To avoid references to boost from
-// the precompiled-to-ir code (this causes issues with symbol resolution at runtime), we
-// use a wrapper exported from the CPP code. The wrapper functions are named gdv_xlarge_xx
+// Several operations (multiply, divide, mod, ..) require converting to 256-bit,
+// and we use the boost library for doing 256-bit operations. To avoid
+// references to boost from the precompiled-to-ir code (this causes issues with
+// symbol resolution at runtime), we use a wrapper exported from the CPP code.
+// The wrapper functions are named gdv_xlarge_xx
 
 namespace gandiva {
 namespace decimalops {
@@ -98,7 +99,8 @@ static BasicDecimal128 AddLargePositive(const BasicDecimalScalar128& x,
   return (left * BasicDecimal128::GetScaleMultiplier(out_scale)) + right;
 }
 
-/// x_value and y_value cannot be 0, and one must be positive and the other negative.
+/// x_value and y_value cannot be 0, and one must be positive and the other
+/// negative.
 static BasicDecimal128 AddLargeNegative(const BasicDecimalScalar128& x,
                                         const BasicDecimalScalar128& y,
                                         int32_t out_scale) {
@@ -120,9 +122,9 @@ static BasicDecimal128 AddLargeNegative(const BasicDecimalScalar128& x,
   auto left = x_left + y_left;
   auto right = x_right + y_right;
 
-  // If the whole and fractional parts have different signs, then we need to make the
-  // fractional part have the same sign as the whole part. If either left or right is
-  // zero, then nothing needs to be done.
+  // If the whole and fractional parts have different signs, then we need to
+  // make the fractional part have the same sign as the whole part. If either
+  // left or right is zero, then nothing needs to be done.
   if (left < 0 && right > 0) {
     left += 1;
     right -= BasicDecimal128::GetScaleMultiplier(higher_scale);
@@ -150,9 +152,9 @@ static BasicDecimal128 AddLarge(const BasicDecimalScalar128& x,
   }
 }
 
-// Suppose we have a number that requires x bits to be represented and we scale it up by
-// 10^scale_by. Let's say now y bits are required to represent it. This function returns
-// the maximum possible y - x for a given 'scale_by'.
+// Suppose we have a number that requires x bits to be represented and we scale
+// it up by 10^scale_by. Let's say now y bits are required to represent it. This
+// function returns the maximum possible y - x for a given 'scale_by'.
 inline int32_t MaxBitsRequiredIncreaseAfterScaling(int32_t scale_by) {
   // We rely on the following formula:
   // bits_required(x * 10^y) <= bits_required(x) + floor(log2(10^y)) + 1
@@ -168,8 +170,9 @@ inline int32_t MaxBitsRequiredIncreaseAfterScaling(int32_t scale_by) {
   return floor_log2_plus_one[scale_by];
 }
 
-// If we have a number with 'num_lz' leading zeros, and we scale it up by 10^scale_by,
-// this function returns the minimum number of leading zeros the result can have.
+// If we have a number with 'num_lz' leading zeros, and we scale it up by
+// 10^scale_by, this function returns the minimum number of leading zeros the
+// result can have.
 inline int32_t MinLeadingZerosAfterScaling(int32_t num_lz, int32_t scale_by) {
   DCHECK_GE(scale_by, 0);
   DCHECK_LE(scale_by, 76);
@@ -177,7 +180,8 @@ inline int32_t MinLeadingZerosAfterScaling(int32_t num_lz, int32_t scale_by) {
   return result;
 }
 
-// Returns the maximum possible number of bits required to represent num * 10^scale_by.
+// Returns the maximum possible number of bits required to represent num *
+// 10^scale_by.
 inline int32_t MaxBitsRequiredAfterScaling(const BasicDecimalScalar128& num,
                                            int32_t scale_by) {
   auto value = num.value();
@@ -189,8 +193,8 @@ inline int32_t MaxBitsRequiredAfterScaling(const BasicDecimalScalar128& num,
   return num_occupied + MaxBitsRequiredIncreaseAfterScaling(scale_by);
 }
 
-// Returns the minimum number of leading zero x or y would have after one of them gets
-// scaled up to match the scale of the other one.
+// Returns the minimum number of leading zero x or y would have after one of
+// them gets scaled up to match the scale of the other one.
 inline int32_t MinLeadingZeros(const BasicDecimalScalar128& x,
                                const BasicDecimalScalar128& y) {
   auto x_value = x.value();
@@ -217,15 +221,16 @@ BasicDecimal128 Add(const BasicDecimalScalar128& x, const BasicDecimalScalar128&
   } else {
     int32_t min_lz = MinLeadingZeros(x, y);
     if (min_lz >= 3) {
-      // If both numbers have at least MIN_LZ leading zeros, we can add them directly
-      // without the risk of overflow.
-      // We want the result to have at least 2 leading zeros, which ensures that it fits
-      // into the maximum decimal because 2^126 - 1 < 10^38 - 1. If both x and y have at
-      // least 3 leading zeros, then we are guaranteed that the result will have at lest 2
-      // leading zeros.
+      // If both numbers have at least MIN_LZ leading zeros, we can add them
+      // directly without the risk of overflow. We want the result to have at
+      // least 2 leading zeros, which ensures that it fits into the maximum
+      // decimal because 2^126 - 1 < 10^38 - 1. If both x and y have at least 3
+      // leading zeros, then we are guaranteed that the result will have at lest
+      // 2 leading zeros.
       return AddNoOverflow(x, y, out_scale);
     } else {
-      // slower-version : add whole/fraction parts separately, and then, combine.
+      // slower-version : add whole/fraction parts separately, and then,
+      // combine.
       return AddLarge(x, y, out_scale);
     }
   }
@@ -236,8 +241,8 @@ BasicDecimal128 Subtract(const BasicDecimalScalar128& x, const BasicDecimalScala
   return Add(x, {-y.value(), y.precision(), y.scale()}, out_precision, out_scale);
 }
 
-// Multiply when the out_precision is 38, and there is no trimming of the scale i.e
-// the intermediate value is the same as the final value.
+// Multiply when the out_precision is 38, and there is no trimming of the scale
+// i.e the intermediate value is the same as the final value.
 static BasicDecimal128 MultiplyMaxPrecisionNoScaleDown(const BasicDecimalScalar128& x,
                                                        const BasicDecimalScalar128& y,
                                                        int32_t out_scale,
@@ -284,31 +289,33 @@ static BasicDecimal128 MultiplyMaxPrecisionAndScaleDown(const BasicDecimalScalar
     int64_t result_high;
     uint64_t result_low;
 
-    // This requires converting to 256-bit, and we use the boost library for that. To
-    // avoid references to boost from the precompiled-to-ir code (this causes issues
-    // with symbol resolution at runtime), we use a wrapper exported from the CPP code.
+    // This requires converting to 256-bit, and we use the boost library for
+    // that. To avoid references to boost from the precompiled-to-ir code (this
+    // causes issues with symbol resolution at runtime), we use a wrapper
+    // exported from the CPP code.
     gdv_xlarge_multiply_and_scale_down(x.value().high_bits(), x.value().low_bits(),
                                        y.value().high_bits(), y.value().low_bits(),
                                        delta_scale, &result_high, &result_low, overflow);
     result = BasicDecimal128(result_high, result_low);
   } else {
     if (ARROW_PREDICT_TRUE(delta_scale <= 38)) {
-      // The largest value that result can have here is (2^64 - 1) * (2^63 - 1), which is
-      // greater than BasicDecimal128::kMaxValue.
+      // The largest value that result can have here is (2^64 - 1) * (2^63 - 1),
+      // which is greater than BasicDecimal128::kMaxValue.
       result = x.value() * y.value();
       // Since delta_scale is greater than zero, result can now be at most
-      // ((2^64 - 1) * (2^63 - 1)) / 10, which is less than BasicDecimal128::kMaxValue, so
-      // there cannot be any overflow.
+      // ((2^64 - 1) * (2^63 - 1)) / 10, which is less than
+      // BasicDecimal128::kMaxValue, so there cannot be any overflow.
       result = result.ReduceScaleBy(delta_scale);
     } else {
-      // We are multiplying decimal(38, 38) by decimal(38, 38). The result should be a
-      // decimal(38, 37), so delta scale = 38 + 38 - 37 = 39. Since we are not in the
-      // 256 bit intermediate value case and we are scaling down by 39, then we are
-      // guaranteed that the result is 0 (even if we try to round). The largest possible
-      // intermediate result is 38 "9"s. If we scale down by 39, the leftmost 9 is now
-      // two digits to the right of the rightmost "visible" one. The reason why we have
-      // to handle this case separately is because a scale multiplier with a delta_scale
-      // 39 does not fit into 128 bit.
+      // We are multiplying decimal(38, 38) by decimal(38, 38). The result
+      // should be a decimal(38, 37), so delta scale = 38 + 38 - 37 = 39. Since
+      // we are not in the 256 bit intermediate value case and we are scaling
+      // down by 39, then we are guaranteed that the result is 0 (even if we try
+      // to round). The largest possible intermediate result is 38 "9"s. If we
+      // scale down by 39, the leftmost 9 is now two digits to the right of the
+      // rightmost "visible" one. The reason why we have to handle this case
+      // separately is because a scale multiplier with a delta_scale 39 does not
+      // fit into 128 bit.
       DCHECK_EQ(delta_scale, 39);
       result = 0;
     }
@@ -557,7 +564,8 @@ enum RoundType {
   kRoundTypeFloor,        // -1 if -ve and trailing value is < 0, else no rounding.
   kRoundTypeTrunc,        // no rounding, truncate the trailing digits.
   kRoundTypeHalfRoundUp,  // if +ve and trailing value is >= half of base, +1.
-                          // else if -ve and trailing value is >= half of base, -1.
+                          // else if -ve and trailing value is >= half of base,
+                          // -1.
 };
 
 // Compute the rounding delta for the givven rounding type.
@@ -627,10 +635,10 @@ static BasicDecimal128 RoundWithPositiveScale(const BasicDecimalScalar128& x,
     return scaled;
   }
 
-  // If there is a rounding delta, the output scale must be less than the input scale.
-  // That means at least one digit is dropped after the decimal. The delta add can add
-  // utmost one digit before the decimal. So, overflow will occur only if the output
-  // precision has changed.
+  // If there is a rounding delta, the output scale must be less than the input
+  // scale. That means at least one digit is dropped after the decimal. The
+  // delta add can add utmost one digit before the decimal. So, overflow will
+  // occur only if the output precision has changed.
   DCHECK_GT(x.scale(), out_scale);
   auto result = scaled + delta;
   DECIMAL_OVERFLOW_IF(out_precision < x.precision() &&
@@ -640,7 +648,8 @@ static BasicDecimal128 RoundWithPositiveScale(const BasicDecimalScalar128& x,
 }
 
 // Modify scale to drop all digits to the right of the decimal and round.
-// Then, zero out 'rounding_scale' number of digits to the left of the decimal point.
+// Then, zero out 'rounding_scale' number of digits to the left of the decimal
+// point.
 static BasicDecimal128 RoundWithNegativeScale(const BasicDecimalScalar128& x,
                                               int32_t out_precision,
                                               int32_t rounding_scale,
diff --git a/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.h b/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.h
index 5a4e50bba..1b778b811 100644
--- a/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.h
+++ b/native-sql-engine/cpp/src/third_party/gandiva/decimal_ops.h
@@ -20,6 +20,7 @@
 
 #include <cstdint>
 #include <string>
+
 #include "gandiva/basic_decimal_scalar.h"
 
 namespace gandiva {
diff --git a/native-sql-engine/cpp/src/third_party/gandiva/time.cc b/native-sql-engine/cpp/src/third_party/gandiva/time.cc
index 882338b27..a6873f831 100644
--- a/native-sql-engine/cpp/src/third_party/gandiva/time.cc
+++ b/native-sql-engine/cpp/src/third_party/gandiva/time.cc
@@ -180,9 +180,8 @@ int getJanWeekOfYear(const EpochTimePoint& tp) {
 
   if (jan1_wday == 5) {
     // Jan 1 is a Fri
-    // Jan 1-3 belong to previous year. Dec 31 of previous year same week # as Jan 1-3
-    // previous year is a leap year:
-    // Prev Jan 1 is a Wed. Jan 6th is Mon
+    // Jan 1-3 belong to previous year. Dec 31 of previous year same week # as
+    // Jan 1-3 previous year is a leap year: Prev Jan 1 is a Wed. Jan 6th is Mon
     // Dec 31 - Jan 6 = 366 - 5 = 361
     // week from Jan 6 = (361 - 1) / 7 + 1 = 52
     // week # in previous year = 52 + 1 = 53
@@ -270,14 +269,15 @@ int getDecWeekOfYear(const EpochTimePoint& tp) {
 //
 // Important points to note:
 // Week starts with a Monday and ends with a Sunday
-// A week can have some days in this year and some days in the previous/next year
-// This is true for the first and last weeks
+// A week can have some days in this year and some days in the previous/next
+// year This is true for the first and last weeks
 //
 // The first week of the year should have at-least 4 days in the current year
 // The last week of the year should have at-least 4 days in the current year
 //
-// A given day might belong to the first week of the next year - e.g Dec 29, 30 and 31
-// A given day might belong to the last week of the previous year - e.g. Jan 1, 2 and 3
+// A given day might belong to the first week of the next year - e.g Dec 29, 30
+// and 31 A given day might belong to the last week of the previous year - e.g.
+// Jan 1, 2 and 3
 //
 // Algorithm:
 // If day belongs to week in current year, weekOfCurrentYear
@@ -485,10 +485,10 @@ bool IsLastDayOfMonth(const EpochTimePoint& tp) {
 // MONTHS_BETWEEN returns number of months between dates date1 and date2.
 // If date1 is later than date2, then the result is positive.
 // If date1 is earlier than date2, then the result is negative.
-// If date1 and date2 are either the same days of the month or both last days of months,
-// then the result is always an integer. Otherwise Oracle Database calculates the
-// fractional portion of the result based on a 31-day month and considers the difference
-// in time components date1 and date2
+// If date1 and date2 are either the same days of the month or both last days of
+// months, then the result is always an integer. Otherwise Oracle Database
+// calculates the fractional portion of the result based on a 31-day month and
+// considers the difference in time components date1 and date2
 #define MONTHS_BETWEEN(TYPE)                                                        \
   FORCE_INLINE                                                                      \
   double months_between##_##TYPE##_##TYPE(uint64_t endEpoch, uint64_t startEpoch) { \
@@ -631,7 +631,8 @@ const char* castVARCHAR_date32_int64(gdv_int64 context, gdv_date32 in_day,
  * Input consists of mandatory and optional fields.
  * Mandatory fields are year, month and day.
  * Optional fields are time, displacement and zone.
- * Format is <year-month-day>[ hours:minutes:seconds][.millis][ displacement|zone]
+ * Format is <year-month-day>[ hours:minutes:seconds][.millis][
+ * displacement|zone]
  */
 gdv_timestamp castTIMESTAMP_utf8(int64_t context, const char* input, gdv_int32 length) {
   using arrow_vendored::date::day;
diff --git a/native-sql-engine/cpp/src/third_party/gandiva/types.h b/native-sql-engine/cpp/src/third_party/gandiva/types.h
index 57df2a0a9..71059e414 100644
--- a/native-sql-engine/cpp/src/third_party/gandiva/types.h
+++ b/native-sql-engine/cpp/src/third_party/gandiva/types.h
@@ -21,7 +21,8 @@
 
 #include "gandiva/gdv_function_stubs.h"
 
-// Use the same names as in arrow data types. Makes it easy to write pre-processor macros.
+// Use the same names as in arrow data types. Makes it easy to write
+// pre-processor macros.
 using gdv_boolean = bool;
 using gdv_int8 = int8_t;
 using gdv_int16 = int16_t;
diff --git a/native-sql-engine/cpp/src/third_party/murmurhash/murmurhash32.h b/native-sql-engine/cpp/src/third_party/murmurhash/murmurhash32.h
index fcfb7ac47..9fe5179ca 100644
--- a/native-sql-engine/cpp/src/third_party/murmurhash/murmurhash32.h
+++ b/native-sql-engine/cpp/src/third_party/murmurhash/murmurhash32.h
@@ -14,32 +14,33 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+#pragma once
 #include <arrow/type_fwd.h>
 #include <arrow/util/decimal.h>
 #include <string.h>
 
-#include "arrow/util/string_view.h" // IWYU pragma: export
+#include "arrow/util/string_view.h"  // IWYU pragma: export
 
 namespace sparkcolumnarplugin {
 namespace thirdparty {
 namespace murmurhash32 {
 
-template <typename T> using is_int64 = std::is_same<int64_t, T>;
+template <typename T>
+using is_int64 = std::is_same<int64_t, T>;
 
 template <typename T>
-using enable_if_int64 =
-    typename std::enable_if<is_int64<T>::value, int32_t>::type;
+using enable_if_int64 = typename std::enable_if<is_int64<T>::value, int32_t>::type;
 
-template <typename T> using is_string = std::is_same<std::string, T>;
+template <typename T>
+using is_string = std::is_same<std::string, T>;
 
 template <typename T>
-using enable_if_string =
-    typename std::enable_if<is_string<T>::value, int32_t>::type;
+using enable_if_string = typename std::enable_if<is_string<T>::value, int32_t>::type;
 
 template <typename T>
 using enable_if_decimal =
-    typename std::enable_if<std::is_same<T, arrow::Decimal128>::value,
-                            int32_t>::type;
+    typename std::enable_if<std::is_same<T, arrow::Decimal128>::value, int32_t>::type;
 template <typename T>
 using is_string_or_decimal =
     std::integral_constant<bool, is_string<T>::value ||
@@ -70,7 +71,8 @@ inline int64_t fmix64(int64_t k) {
   return k;
 }
 
-template <typename T> inline enable_if_int64<T> hash32(T val, int32_t seed) {
+template <typename T>
+inline enable_if_int64<T> hash32(T val, int32_t seed) {
   int64_t c1 = 0xcc9e2d51ull;
   int64_t c2 = 0x1b873593ull;
   int length = 8;
@@ -107,17 +109,16 @@ template <typename T> inline enable_if_int64<T> hash32(T val, int32_t seed) {
 
 template <typename T>
 inline enable_if_string<T> hash32(T val, bool validity, int32_t seed) {
-  if (!validity)
-    return seed;
+  if (!validity) return seed;
   auto key = val.data();
   auto len = val.length();
   const int64_t c1 = 0xcc9e2d51ull;
   const int64_t c2 = 0x1b873593ull;
   const int64_t UINT_MASK = 0xffffffffull;
   int64_t lh1 = seed;
-  const int32_t *blocks = reinterpret_cast<const int32_t *>(key);
+  const int32_t* blocks = reinterpret_cast<const int32_t*>(key);
   int nblocks = len / 4;
-  const uint8_t *tail = reinterpret_cast<const uint8_t *>(key + nblocks * 4);
+  const uint8_t* tail = reinterpret_cast<const uint8_t*>(key + nblocks * 4);
   for (int i = 0; i < nblocks; i++) {
     int64_t lk1 = static_cast<int64_t>(blocks[i]);
 
@@ -140,20 +141,20 @@ inline enable_if_string<T> hash32(T val, bool validity, int32_t seed) {
   int64_t lk1 = 0;
 
   switch (len & 3) {
-  case 3:
-    lk1 = (tail[2] & 0xff) << 16;
-  case 2:
-    lk1 |= (tail[1] & 0xff) << 8;
-  case 1:
-    lk1 |= (tail[0] & 0xff);
-    lk1 *= c1;
-    lk1 = UINT_MASK & lk1;
-    lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
-
-    lk1 *= c2;
-    lk1 = lk1 & UINT_MASK;
-
-    lh1 ^= lk1;
+    case 3:
+      lk1 = (tail[2] & 0xff) << 16;
+    case 2:
+      lk1 |= (tail[1] & 0xff) << 8;
+    case 1:
+      lk1 |= (tail[0] & 0xff);
+      lk1 *= c1;
+      lk1 = UINT_MASK & lk1;
+      lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+      lk1 *= c2;
+      lk1 = lk1 & UINT_MASK;
+
+      lh1 ^= lk1;
   }
 
   // finalization
@@ -171,14 +172,14 @@ inline enable_if_string<T> hash32(T val, bool validity, int32_t seed) {
   return static_cast<int32_t>(lh1 & UINT_MASK);
 }
 
-template <typename T> inline enable_if_string<T> hash32(T val, bool validity) {
+template <typename T>
+inline enable_if_string<T> hash32(T val, bool validity) {
   return hash32(val, validity, 0);
 }
 
 template <typename T>
 inline enable_if_decimal<T> hash32(T val, bool validity, int32_t seed) {
-  if (!validity)
-    return seed;
+  if (!validity) return seed;
   auto arr = val.ToBytes();
   auto key = arr.data();
   auto len = arr.size();
@@ -186,9 +187,9 @@ inline enable_if_decimal<T> hash32(T val, bool validity, int32_t seed) {
   const int64_t c2 = 0x1b873593ull;
   const int64_t UINT_MASK = 0xffffffffull;
   int64_t lh1 = seed;
-  const int32_t *blocks = reinterpret_cast<const int32_t *>(key);
+  const int32_t* blocks = reinterpret_cast<const int32_t*>(key);
   int nblocks = len / 4;
-  const uint8_t *tail = reinterpret_cast<const uint8_t *>(key + nblocks * 4);
+  const uint8_t* tail = reinterpret_cast<const uint8_t*>(key + nblocks * 4);
   for (int i = 0; i < nblocks; i++) {
     int64_t lk1 = static_cast<int64_t>(blocks[i]);
 
@@ -211,20 +212,20 @@ inline enable_if_decimal<T> hash32(T val, bool validity, int32_t seed) {
   int64_t lk1 = 0;
 
   switch (len & 3) {
-  case 3:
-    lk1 = (tail[2] & 0xff) << 16;
-  case 2:
-    lk1 |= (tail[1] & 0xff) << 8;
-  case 1:
-    lk1 |= (tail[0] & 0xff);
-    lk1 *= c1;
-    lk1 = UINT_MASK & lk1;
-    lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
-
-    lk1 *= c2;
-    lk1 = lk1 & UINT_MASK;
-
-    lh1 ^= lk1;
+    case 3:
+      lk1 = (tail[2] & 0xff) << 16;
+    case 2:
+      lk1 |= (tail[1] & 0xff) << 8;
+    case 1:
+      lk1 |= (tail[0] & 0xff);
+      lk1 *= c1;
+      lk1 = UINT_MASK & lk1;
+      lk1 = ((lk1 << 15) & UINT_MASK) | (lk1 >> 17);
+
+      lk1 *= c2;
+      lk1 = lk1 & UINT_MASK;
+
+      lh1 ^= lk1;
   }
 
   // finalization
@@ -242,7 +243,8 @@ inline enable_if_decimal<T> hash32(T val, bool validity, int32_t seed) {
   return static_cast<int32_t>(lh1 & UINT_MASK);
 }
 
-template <typename T> inline enable_if_decimal<T> hash32(T val, bool validity) {
+template <typename T>
+inline enable_if_decimal<T> hash32(T val, bool validity) {
   return hash32(val, validity, 0);
 }
 
@@ -253,10 +255,8 @@ inline int64_t double_to_long_bits(double value) {
 }
 
 template <typename T>
-inline enable_if_not_string_or_decimal<T> hash32(T in, bool validity,
-                                                 int32_t seed) {
-  return validity ? hash32(double_to_long_bits(static_cast<double>(in)), seed)
-                  : seed;
+inline enable_if_not_string_or_decimal<T> hash32(T in, bool validity, int32_t seed) {
+  return validity ? hash32(double_to_long_bits(static_cast<double>(in)), seed) : seed;
 }
 
 template <typename T>
@@ -266,6 +266,6 @@ inline enable_if_not_string_or_decimal<T> hash32(T in, bool validity) {
 
 // Wrappers for the varlen types
 
-} // namespace murmurhash32
-} // namespace thirdparty
-} // namespace sparkcolumnarplugin
+}  // namespace murmurhash32
+}  // namespace thirdparty
+}  // namespace sparkcolumnarplugin
diff --git a/native-sql-engine/cpp/src/third_party/row_wise_memory/hashMap.h b/native-sql-engine/cpp/src/third_party/row_wise_memory/hashMap.h
old mode 100755
new mode 100644
index ff2ffcdfd..f74436eeb
--- a/native-sql-engine/cpp/src/third_party/row_wise_memory/hashMap.h
+++ b/native-sql-engine/cpp/src/third_party/row_wise_memory/hashMap.h
@@ -20,6 +20,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <iostream>
+
 #include "codegen/arrow_compute/ext/array_item_index.h"
 #include "third_party/row_wise_memory/unsafe_row.h"
 
@@ -40,10 +42,10 @@ using sparkcolumnarplugin::codegen::arrowcompute::extra::ArrayItemIndex;
  * | key-hash(4 bytes) | bytesMap offset(4 bytes) |
  *
  * BytesMap: map to store key and value data
- * each item has format as below, same key items will be linked (Min size is 8 bytes when
- * key and value both 0)
- * | total-length(2 bytes) | key-length(2 bytes) | key data(variable-size) | value
- *data(variable-size) | next value ptr(4 bytes) |
+ * each item has format as below, same key items will be linked (Min size is 8
+ *bytes when key and value both 0) | total-length(2 bytes) | key-length(2 bytes)
+ *| key data(variable-size) | value data(variable-size) | next value ptr(4
+ *bytes) |
  *
  **/
 
@@ -127,7 +129,8 @@ static inline void dump(unsafeHashMap* hm) {
         printf("%04x  ", tmp);  // value_data
         i = value_length;
       } else {
-        printf("%04x  ", *(int*)(hm->bytesMap + pos + 4 + key_length + i));  // value_data
+        printf("%04x  ",
+               *(int*)(hm->bytesMap + pos + 4 + key_length + i));  // value_data
         i += 4;
       }
     }
@@ -156,7 +159,8 @@ static inline unsafeHashMap* createUnsafeHashMap(arrow::MemoryPool* pool,
   hashMap->arrayCapacity = initArrayCapacity;
   memset(hashMap->keyArray, -1, initArrayCapacity * bytesInKeyArray);
 
-  // hashMap->bytesMap = (char*)nativeMalloc(initialHashCapacity, MEMTYPE_HASHMAP);
+  // hashMap->bytesMap = (char*)nativeMalloc(initialHashCapacity,
+  // MEMTYPE_HASHMAP);
   pool->Allocate(initialHashCapacity, (uint8_t**)&hashMap->bytesMap);
   hashMap->mapSize = initialHashCapacity;
 
@@ -797,10 +801,10 @@ static inline bool append(unsafeHashMap* hashMap, CType keyRow, int hashVal, cha
   int keySizeInBytes = hashMap->bytesInKeyArray;
   char* keyArrayBase = hashMap->keyArray;
 
-  // chendi: Add a optimization here, use offset first bit to indicate if this offset is
-  // ArrayItemIndex or bytesmap offset
-  // if first key, it will be arrayItemIndex first bit is 0
-  // if multiple same key, it will be offset first bit is 1
+  // chendi: Add a optimization here, use offset first bit to indicate if this
+  // offset is ArrayItemIndex or bytesmap offset if first key, it will be
+  // arrayItemIndex first bit is 0 if multiple same key, it will be offset first
+  // bit is 1
 
   while (true) {
     int KeyAddressOffset = *(int*)(keyArrayBase + pos * keySizeInBytes);
@@ -911,10 +915,10 @@ static inline bool append(unsafeHashMap* hashMap, CType keyRow, int hashVal, cha
   int keySizeInBytes = hashMap->bytesInKeyArray;
   char* keyArrayBase = hashMap->keyArray;
 
-  // chendi: Add a optimization here, use offset first bit to indicate if this offset is
-  // ArrayItemIndex or bytesmap offset
-  // if first key, it will be arrayItemIndex first bit is 0
-  // if multiple same key, it will be offset first bit is 1
+  // chendi: Add a optimization here, use offset first bit to indicate if this
+  // offset is ArrayItemIndex or bytesmap offset if first key, it will be
+  // arrayItemIndex first bit is 0 if multiple same key, it will be offset first
+  // bit is 1
 
   while (true) {
     int KeyAddressOffset = *(int*)(keyArrayBase + pos * keySizeInBytes);
@@ -1087,3 +1091,300 @@ static inline bool append(unsafeHashMap* hashMap, const char* keyRow, size_t key
 
   return true;
 }
+
+/**
+ * append is used for same key may has multiple value scenario
+ * if key does not exists, insert key and append a new record for key value
+ * if key exists, append a new record and linked by previous same key record
+ *
+ * return should be a flag of succession of the append.
+ **/
+static inline bool appendNewKey(unsafeHashMap* hashMap, UnsafeRow* keyRow, int hashVal,
+                                char* value, size_t value_size) {
+  assert(hashMap->keyArray != NULL);
+
+  const int cursor = hashMap->cursor;
+  const int mask = hashMap->arrayCapacity - 1;
+
+  int pos = hashVal & mask;
+  int step = 1;
+
+  const int keyLength = keyRow->sizeInBytes();
+  char* base = hashMap->bytesMap;
+  int klen = keyRow->sizeInBytes();
+  const int vlen = value_size;
+  const int recordLength = 4 + klen + vlen + 4;
+  char* record = nullptr;
+
+  int keySizeInBytes = 8;
+  char* keyArrayBase = hashMap->keyArray;
+
+  while (true) {
+    int KeyAddressOffset = *(int*)(keyArrayBase + pos * keySizeInBytes);
+    int keyHashCode = *(int*)(keyArrayBase + pos * keySizeInBytes + 4);
+
+    if (KeyAddressOffset < 0) {
+      // This is a new key.
+      int keyArrayPos = pos;
+      record = base + cursor;
+      // Update keyArray in hashMap
+      hashMap->numKeys++;
+      *(int*)(keyArrayBase + pos * keySizeInBytes) = cursor;
+      *(int*)(keyArrayBase + pos * keySizeInBytes + 4) = hashVal;
+      hashMap->cursor += recordLength;
+      break;
+    } else {
+      if ((int)keyHashCode == hashVal) {
+        // Full hash code matches.  Let's compare the keys for equality.
+        record = base + KeyAddressOffset;
+        if ((getKeyLength(record) == keyLength) &&
+            (memcmp(keyRow->data, getKeyFromBytesMap(record), keyLength) == 0)) {
+          return true;
+        }
+      }
+    }
+
+    pos = (pos + step) & mask;
+    step++;
+  }
+
+  // copy keyRow and valueRow into hashmap
+  assert((klen & 0xff00) == 0);
+  auto total_key_length = ((8 + klen + vlen) << 16) | klen;
+  *((int*)record) = total_key_length;
+  memcpy(record + 4, keyRow->data, klen);
+  memcpy(record + 4 + klen, value, vlen);
+  *((int*)(record + 4 + klen + vlen)) = 0;
+
+  // See if we need to grow keyArray
+  int growthThreshold = (int)(hashMap->arrayCapacity * loadFactor);
+  if ((hashMap->numKeys > growthThreshold) &&
+      (hashMap->arrayCapacity < MAX_HASH_MAP_CAPACITY)) {
+    if (!growAndRehashKeyArray(hashMap)) hashMap->needSpill = true;
+  }
+
+  return true;
+}
+
+/**
+ * append is used for same key may has multiple value scenario
+ * if key does not exists, insert key and append a new record for key value
+ * if key exists, append a new record and linked by previous same key record
+ *
+ * return should be a flag of succession of the append.
+ **/
+template <typename CType,
+          typename std::enable_if_t<is_number_alike<CType>::value>* = nullptr>
+static inline bool appendNewKey(unsafeHashMap* hashMap, CType keyRow, int hashVal,
+                                char* value, size_t value_size) {
+  assert(hashMap->keyArray != NULL);
+
+  const int cursor = hashMap->cursor;
+  const int mask = hashMap->arrayCapacity - 1;
+
+  int pos = hashVal & mask;
+  int step = 1;
+
+  const int keyLength = sizeof(keyRow);
+  char* base = hashMap->bytesMap;
+  int klen = 0;
+  const int vlen = value_size;
+  const int recordLength = 4 + klen + vlen + 4;
+  char* record = nullptr;
+
+  int keySizeInBytes = hashMap->bytesInKeyArray;
+  char* keyArrayBase = hashMap->keyArray;
+
+  // chendi: Add a optimization here, use offset first bit to indicate if this
+  // offset is ArrayItemIndex or bytesmap offset if first key, it will be
+  // arrayItemIndex first bit is 0 if multiple same key, it will be offset first
+  // bit is 1
+
+  while (true) {
+    int KeyAddressOffset = *(int*)(keyArrayBase + pos * keySizeInBytes);
+    int keyHashCode = *(int*)(keyArrayBase + pos * keySizeInBytes + 4);
+
+    if (KeyAddressOffset == -1) {
+      // This is a new key.
+      int keyArrayPos = pos;
+      // Update keyArray in hashMap
+      hashMap->numKeys++;
+      *(int*)(keyArrayBase + pos * keySizeInBytes) = *(int*)value;
+      *(int*)(keyArrayBase + pos * keySizeInBytes + 4) = hashVal;
+      *(CType*)(keyArrayBase + pos * keySizeInBytes + 8) = keyRow;
+      return true;
+    } else {
+      char* previous_value = nullptr;
+      if (((int)keyHashCode == hashVal) &&
+          (keyRow == *(CType*)(keyArrayBase + pos * keySizeInBytes + 8))) {
+        return true;
+      }
+    }
+
+    pos = (pos + step) & mask;
+    step++;
+  }
+
+  // copy keyRow and valueRow into hashmap
+  auto total_key_length = ((8 + klen + vlen) << 16) | klen;
+  *((int*)record) = total_key_length;
+  // memcpy(record + 4, &keyRow, klen);
+  memcpy(record + 4 + klen, value, vlen);
+  *((int*)(record + 4 + klen + vlen)) = 0;
+
+  // See if we need to grow keyArray
+  int growthThreshold = (int)(hashMap->arrayCapacity * loadFactor);
+  if ((hashMap->numKeys > growthThreshold) &&
+      (hashMap->arrayCapacity < MAX_HASH_MAP_CAPACITY)) {
+    if (!growAndRehashKeyArray(hashMap)) hashMap->needSpill = true;
+  }
+
+  return true;
+}
+
+/**
+ * append is used for same key may has multiple value scenario
+ * if key does not exists, insert key and append a new record for key value
+ * if key exists, append a new record and linked by previous same key record
+ *
+ * return should be a flag of succession of the append.
+ **/
+template <typename CType, typename std::enable_if_t<
+                              std::is_same<CType, arrow::Decimal128>::value>* = nullptr>
+static inline bool appendNewKey(unsafeHashMap* hashMap, CType keyRow, int hashVal,
+                                char* value, size_t value_size) {
+  assert(hashMap->keyArray != NULL);
+
+  const int cursor = hashMap->cursor;
+  const int mask = hashMap->arrayCapacity - 1;
+
+  int pos = hashVal & mask;
+  int step = 1;
+
+  const int keyLength = 16; /*sizeof Deimal128*/
+  char* base = hashMap->bytesMap;
+  int klen = 0;
+  const int vlen = value_size;
+  const int recordLength = 4 + klen + vlen + 4;
+  char* record = nullptr;
+
+  int keySizeInBytes = hashMap->bytesInKeyArray;
+  char* keyArrayBase = hashMap->keyArray;
+
+  // chendi: Add a optimization here, use offset first bit to indicate if this
+  // offset is ArrayItemIndex or bytesmap offset if first key, it will be
+  // arrayItemIndex first bit is 0 if multiple same key, it will be offset first
+  // bit is 1
+
+  while (true) {
+    int KeyAddressOffset = *(int*)(keyArrayBase + pos * keySizeInBytes);
+    int keyHashCode = *(int*)(keyArrayBase + pos * keySizeInBytes + 4);
+
+    if (KeyAddressOffset == -1) {
+      // This is a new key.
+      int keyArrayPos = pos;
+      // Update keyArray in hashMap
+      hashMap->numKeys++;
+      *(int*)(keyArrayBase + pos * keySizeInBytes) = *(int*)value;
+      *(int*)(keyArrayBase + pos * keySizeInBytes + 4) = hashVal;
+      *(CType*)(keyArrayBase + pos * keySizeInBytes + 8) = keyRow;
+      return true;
+    } else {
+      char* previous_value = nullptr;
+      if (((int)keyHashCode == hashVal) &&
+          (keyRow == *(CType*)(keyArrayBase + pos * keySizeInBytes + 8))) {
+        return true;
+      }
+    }
+
+    pos = (pos + step) & mask;
+    step++;
+  }
+
+  // copy keyRow and valueRow into hashmap
+  auto total_key_length = ((8 + klen + vlen) << 16) | klen;
+  *((int*)record) = total_key_length;
+  // memcpy(record + 4, &keyRow, klen);
+  memcpy(record + 4 + klen, value, vlen);
+  *((int*)(record + 4 + klen + vlen)) = 0;
+
+  // See if we need to grow keyArray
+  int growthThreshold = (int)(hashMap->arrayCapacity * loadFactor);
+  if ((hashMap->numKeys > growthThreshold) &&
+      (hashMap->arrayCapacity < MAX_HASH_MAP_CAPACITY)) {
+    if (!growAndRehashKeyArray(hashMap)) hashMap->needSpill = true;
+  }
+
+  return true;
+}
+
+/**
+ * append is used for same key may has multiple value scenario
+ * if key does not exists, insert key and append a new record for key value
+ * if key exists, append a new record and linked by previous same key record
+ *
+ * return should be a flag of succession of the append.
+ **/
+static inline bool appendNewKey(unsafeHashMap* hashMap, const char* keyRow,
+                                size_t keyLength, int hashVal, char* value,
+                                size_t value_size) {
+  assert(hashMap->keyArray != NULL);
+
+  const int cursor = hashMap->cursor;
+  const int mask = hashMap->arrayCapacity - 1;
+
+  int pos = hashVal & mask;
+  int step = 1;
+
+  char* base = hashMap->bytesMap;
+  int klen = keyLength;
+  const int vlen = value_size;
+  const int recordLength = 4 + klen + vlen + 4;
+  char* record = nullptr;
+
+  int keySizeInBytes = hashMap->bytesInKeyArray;
+  char* keyArrayBase = hashMap->keyArray;
+
+  while (true) {
+    int KeyAddressOffset = *(int*)(keyArrayBase + pos * keySizeInBytes);
+    int keyHashCode = *(int*)(keyArrayBase + pos * keySizeInBytes + 4);
+
+    if (KeyAddressOffset < 0) {
+      // This is a new key.
+      int keyArrayPos = pos;
+      record = base + cursor;
+      // Update keyArray in hashMap
+      hashMap->numKeys++;
+      *(int*)(keyArrayBase + pos * keySizeInBytes) = cursor;
+      *(int*)(keyArrayBase + pos * keySizeInBytes + 4) = hashVal;
+      hashMap->cursor += recordLength;
+      break;
+    } else {
+      record = base + KeyAddressOffset;
+      if (((int)keyHashCode == hashVal) &&
+          (memcmp(keyRow, getKeyFromBytesMap(record), keyLength) == 0)) {
+        return true;
+      }
+    }
+
+    pos = (pos + step) & mask;
+    step++;
+  }
+
+  // copy keyRow and valueRow into hashmap
+  assert((klen & 0xff00) == 0);
+  auto total_key_length = ((8 + klen + vlen) << 16) | klen;
+  *((int*)record) = total_key_length;
+  memcpy(record + 4, keyRow, klen);
+  memcpy(record + 4 + klen, value, vlen);
+  *((int*)(record + 4 + klen + vlen)) = 0;
+
+  // See if we need to grow keyArray
+  int growthThreshold = (int)(hashMap->arrayCapacity * loadFactor);
+  if ((hashMap->numKeys > growthThreshold) &&
+      (hashMap->arrayCapacity < MAX_HASH_MAP_CAPACITY)) {
+    if (!growAndRehashKeyArray(hashMap)) hashMap->needSpill = true;
+  }
+
+  return true;
+}
\ No newline at end of file
diff --git a/native-sql-engine/cpp/src/third_party/row_wise_memory/native_memory.h b/native-sql-engine/cpp/src/third_party/row_wise_memory/native_memory.h
old mode 100755
new mode 100644
index fd3c4ee7a..395a02547
--- a/native-sql-engine/cpp/src/third_party/row_wise_memory/native_memory.h
+++ b/native-sql-engine/cpp/src/third_party/row_wise_memory/native_memory.h
@@ -1,19 +1,30 @@
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
+ *
  * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
+ * this
+ * work for additional information regarding copyright ownership.
+ * The ASF
+ * licenses this file to You under the Apache License, Version 2.0
+ * (the
+ * "License"); you may not use this file except in compliance with
+ * the
+ * License.  You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ * Unless required by
+ * applicable law or agreed to in writing, software
+ * distributed under the
+ * License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the
+ * specific language governing permissions and
+ * limitations under the
+ * License.
+ */
 #ifndef __NATIVE_MEMORY_H
 #define __NATIVE_MEMORY_H
 
diff --git a/native-sql-engine/cpp/src/third_party/row_wise_memory/unsafe_row.h b/native-sql-engine/cpp/src/third_party/row_wise_memory/unsafe_row.h
index 77f233796..5eb137b4b 100644
--- a/native-sql-engine/cpp/src/third_party/row_wise_memory/unsafe_row.h
+++ b/native-sql-engine/cpp/src/third_party/row_wise_memory/unsafe_row.h
@@ -29,8 +29,8 @@
 #define TEMP_UNSAFEROW_BUFFER_SIZE 1024
 static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
 
-/* Unsafe Row Layout (This unsafe row only used to append all fields data as continuous
- * memory, unable to be get data from)
+/* Unsafe Row Layout (This unsafe row only used to append all fields data as
+ * continuous memory, unable to be get data from)
  *
  * | validity | col 0 | col 1 | col 2 | ...
  * explain:
diff --git a/native-sql-engine/cpp/src/third_party/ska_sort.hpp b/native-sql-engine/cpp/src/third_party/ska_sort.hpp
index 81a9ef2b7..b6c491a37 100644
--- a/native-sql-engine/cpp/src/third_party/ska_sort.hpp
+++ b/native-sql-engine/cpp/src/third_party/ska_sort.hpp
@@ -4,1442 +4,1237 @@
 
 #pragma once
 
-#include <cstdint>
 #include <algorithm>
-#include <type_traits>
+#include <cstdint>
 #include <tuple>
+#include <type_traits>
 #include <utility>
 
-namespace detail
-{
-template<typename count_type, typename It, typename OutIt, typename ExtractKey>
-void counting_sort_impl(It begin, It end, OutIt out_begin, ExtractKey && extract_key)
-{
-    count_type counts[256] = {};
-    for (It it = begin; it != end; ++it)
-    {
-        ++counts[extract_key(*it)];
-    }
-    count_type total = 0;
-    for (count_type & count : counts)
-    {
-        count_type old_count = count;
-        count = total;
-        total += old_count;
-    }
-    for (; begin != end; ++begin)
-    {
-        std::uint8_t key = extract_key(*begin);
-        out_begin[counts[key]++] = std::move(*begin);
-    }
-}
-template<typename It, typename OutIt, typename ExtractKey>
-void counting_sort_impl(It begin, It end, OutIt out_begin, ExtractKey && extract_key)
-{
-    counting_sort_impl<std::uint64_t>(begin, end, out_begin, extract_key);
-}
-inline bool to_unsigned_or_bool(bool b)
-{
-    return b;
-}
-inline unsigned char to_unsigned_or_bool(unsigned char c)
-{
-    return c;
-}
-inline unsigned char to_unsigned_or_bool(signed char c)
-{
-    return static_cast<unsigned char>(c) + 128;
-}
-inline unsigned char to_unsigned_or_bool(char c)
-{
-    return static_cast<unsigned char>(c);
-}
-inline std::uint16_t to_unsigned_or_bool(char16_t c)
-{
-    return static_cast<std::uint16_t>(c);
-}
-inline std::uint32_t to_unsigned_or_bool(char32_t c)
-{
-    return static_cast<std::uint32_t>(c);
+namespace detail {
+template <typename count_type, typename It, typename OutIt, typename ExtractKey>
+void counting_sort_impl(It begin, It end, OutIt out_begin, ExtractKey&& extract_key) {
+  count_type counts[256] = {};
+  for (It it = begin; it != end; ++it) {
+    ++counts[extract_key(*it)];
+  }
+  count_type total = 0;
+  for (count_type& count : counts) {
+    count_type old_count = count;
+    count = total;
+    total += old_count;
+  }
+  for (; begin != end; ++begin) {
+    std::uint8_t key = extract_key(*begin);
+    out_begin[counts[key]++] = std::move(*begin);
+  }
 }
-inline std::uint32_t to_unsigned_or_bool(wchar_t c)
-{
-    return static_cast<std::uint32_t>(c);
+template <typename It, typename OutIt, typename ExtractKey>
+void counting_sort_impl(It begin, It end, OutIt out_begin, ExtractKey&& extract_key) {
+  counting_sort_impl<std::uint64_t>(begin, end, out_begin, extract_key);
 }
-inline unsigned short to_unsigned_or_bool(short i)
-{
-    return static_cast<unsigned short>(i) + static_cast<unsigned short>(1 << (sizeof(short) * 8 - 1));
+inline bool to_unsigned_or_bool(bool b) { return b; }
+inline unsigned char to_unsigned_or_bool(unsigned char c) { return c; }
+inline unsigned char to_unsigned_or_bool(signed char c) {
+  return static_cast<unsigned char>(c) + 128;
 }
-inline unsigned short to_unsigned_or_bool(unsigned short i)
-{
-    return i;
+inline unsigned char to_unsigned_or_bool(char c) { return static_cast<unsigned char>(c); }
+inline std::uint16_t to_unsigned_or_bool(char16_t c) {
+  return static_cast<std::uint16_t>(c);
 }
-inline unsigned int to_unsigned_or_bool(int i)
-{
-    return static_cast<unsigned int>(i) + static_cast<unsigned int>(1 << (sizeof(int) * 8 - 1));
+inline std::uint32_t to_unsigned_or_bool(char32_t c) {
+  return static_cast<std::uint32_t>(c);
 }
-inline unsigned int to_unsigned_or_bool(unsigned int i)
-{
-    return i;
+inline std::uint32_t to_unsigned_or_bool(wchar_t c) {
+  return static_cast<std::uint32_t>(c);
 }
-inline unsigned long to_unsigned_or_bool(long l)
-{
-    return static_cast<unsigned long>(l) + static_cast<unsigned long>(1l << (sizeof(long) * 8 - 1));
+inline unsigned short to_unsigned_or_bool(short i) {
+  return static_cast<unsigned short>(i) +
+         static_cast<unsigned short>(1 << (sizeof(short) * 8 - 1));
 }
-inline unsigned long to_unsigned_or_bool(unsigned long l)
-{
-    return l;
+inline unsigned short to_unsigned_or_bool(unsigned short i) { return i; }
+inline unsigned int to_unsigned_or_bool(int i) {
+  return static_cast<unsigned int>(i) +
+         static_cast<unsigned int>(1 << (sizeof(int) * 8 - 1));
 }
-inline unsigned long long to_unsigned_or_bool(long long l)
-{
-    return static_cast<unsigned long long>(l) + static_cast<unsigned long long>(1ll << (sizeof(long long) * 8 - 1));
+inline unsigned int to_unsigned_or_bool(unsigned int i) { return i; }
+inline unsigned long to_unsigned_or_bool(long l) {
+  return static_cast<unsigned long>(l) +
+         static_cast<unsigned long>(1l << (sizeof(long) * 8 - 1));
 }
-inline unsigned long long to_unsigned_or_bool(unsigned long long l)
-{
-    return l;
+inline unsigned long to_unsigned_or_bool(unsigned long l) { return l; }
+inline unsigned long long to_unsigned_or_bool(long long l) {
+  return static_cast<unsigned long long>(l) +
+         static_cast<unsigned long long>(1ll << (sizeof(long long) * 8 - 1));
 }
-inline std::uint32_t to_unsigned_or_bool(float f)
-{
-    union
-    {
-        float f;
-        std::uint32_t u;
-    } as_union = { f };
-    std::uint32_t sign_bit = -std::int32_t(as_union.u >> 31);
-    return as_union.u ^ (sign_bit | 0x80000000);
+inline unsigned long long to_unsigned_or_bool(unsigned long long l) { return l; }
+inline std::uint32_t to_unsigned_or_bool(float f) {
+  union {
+    float f;
+    std::uint32_t u;
+  } as_union = {f};
+  std::uint32_t sign_bit = -std::int32_t(as_union.u >> 31);
+  return as_union.u ^ (sign_bit | 0x80000000);
 }
-inline std::uint64_t to_unsigned_or_bool(double f)
-{
-    union
-    {
-        double d;
-        std::uint64_t u;
-    } as_union = { f };
-    std::uint64_t sign_bit = -std::int64_t(as_union.u >> 63);
-    return as_union.u ^ (sign_bit | 0x8000000000000000);
+inline std::uint64_t to_unsigned_or_bool(double f) {
+  union {
+    double d;
+    std::uint64_t u;
+  } as_union = {f};
+  std::uint64_t sign_bit = -std::int64_t(as_union.u >> 63);
+  return as_union.u ^ (sign_bit | 0x8000000000000000);
 }
-template<typename T>
-inline size_t to_unsigned_or_bool(T * ptr)
-{
-    return reinterpret_cast<size_t>(ptr);
+template <typename T>
+inline size_t to_unsigned_or_bool(T* ptr) {
+  return reinterpret_cast<size_t>(ptr);
 }
 
-template<size_t>
+template <size_t>
 struct SizedRadixSorter;
 
-template<>
-struct SizedRadixSorter<1>
-{
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        counting_sort_impl(begin, end, buffer_begin, [&](auto && o)
-        {
-            return to_unsigned_or_bool(extract_key(o));
-        });
-        return true;
-    }
+template <>
+struct SizedRadixSorter<1> {
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    counting_sort_impl(begin, end, buffer_begin,
+                       [&](auto&& o) { return to_unsigned_or_bool(extract_key(o)); });
+    return true;
+  }
 
-    static constexpr size_t pass_count = 2;
+  static constexpr size_t pass_count = 2;
 };
-template<>
-struct SizedRadixSorter<2>
-{
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        std::ptrdiff_t num_elements = end - begin;
-        if (num_elements <= (1ll << 32))
-            return sort_inline<uint32_t>(begin, end, buffer_begin, buffer_begin + num_elements, extract_key);
-        else
-            return sort_inline<uint64_t>(begin, end, buffer_begin, buffer_begin + num_elements, extract_key);
+template <>
+struct SizedRadixSorter<2> {
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    std::ptrdiff_t num_elements = end - begin;
+    if (num_elements <= (1ll << 32))
+      return sort_inline<uint32_t>(begin, end, buffer_begin, buffer_begin + num_elements,
+                                   extract_key);
+    else
+      return sort_inline<uint64_t>(begin, end, buffer_begin, buffer_begin + num_elements,
+                                   extract_key);
+  }
+
+  template <typename count_type, typename It, typename OutIt, typename ExtractKey>
+  static bool sort_inline(It begin, It end, OutIt out_begin, OutIt out_end,
+                          ExtractKey&& extract_key) {
+    count_type counts0[256] = {};
+    count_type counts1[256] = {};
+
+    for (It it = begin; it != end; ++it) {
+      uint16_t key = to_unsigned_or_bool(extract_key(*it));
+      ++counts0[key & 0xff];
+      ++counts1[(key >> 8) & 0xff];
     }
-
-    template<typename count_type, typename It, typename OutIt, typename ExtractKey>
-    static bool sort_inline(It begin, It end, OutIt out_begin, OutIt out_end, ExtractKey && extract_key)
-    {
-        count_type counts0[256] = {};
-        count_type counts1[256] = {};
-
-        for (It it = begin; it != end; ++it)
-        {
-            uint16_t key = to_unsigned_or_bool(extract_key(*it));
-            ++counts0[key & 0xff];
-            ++counts1[(key >> 8) & 0xff];
-        }
-        count_type total0 = 0;
-        count_type total1 = 0;
-        for (int i = 0; i < 256; ++i)
-        {
-            count_type old_count0 = counts0[i];
-            count_type old_count1 = counts1[i];
-            counts0[i] = total0;
-            counts1[i] = total1;
-            total0 += old_count0;
-            total1 += old_count1;
-        }
-        for (It it = begin; it != end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it));
-            out_begin[counts0[key]++] = std::move(*it);
-        }
-        for (OutIt it = out_begin; it != out_end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 8;
-            begin[counts1[key]++] = std::move(*it);
-        }
-        return false;
+    count_type total0 = 0;
+    count_type total1 = 0;
+    for (int i = 0; i < 256; ++i) {
+      count_type old_count0 = counts0[i];
+      count_type old_count1 = counts1[i];
+      counts0[i] = total0;
+      counts1[i] = total1;
+      total0 += old_count0;
+      total1 += old_count1;
+    }
+    for (It it = begin; it != end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it));
+      out_begin[counts0[key]++] = std::move(*it);
     }
+    for (OutIt it = out_begin; it != out_end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 8;
+      begin[counts1[key]++] = std::move(*it);
+    }
+    return false;
+  }
 
-    static constexpr size_t pass_count = 3;
+  static constexpr size_t pass_count = 3;
 };
-template<>
-struct SizedRadixSorter<4>
-{
-
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        std::ptrdiff_t num_elements = end - begin;
-        if (num_elements <= (1ll << 32))
-            return sort_inline<uint32_t>(begin, end, buffer_begin, buffer_begin + num_elements, extract_key);
-        else
-            return sort_inline<uint64_t>(begin, end, buffer_begin, buffer_begin + num_elements, extract_key);
+template <>
+struct SizedRadixSorter<4> {
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    std::ptrdiff_t num_elements = end - begin;
+    if (num_elements <= (1ll << 32))
+      return sort_inline<uint32_t>(begin, end, buffer_begin, buffer_begin + num_elements,
+                                   extract_key);
+    else
+      return sort_inline<uint64_t>(begin, end, buffer_begin, buffer_begin + num_elements,
+                                   extract_key);
+  }
+  template <typename count_type, typename It, typename OutIt, typename ExtractKey>
+  static bool sort_inline(It begin, It end, OutIt out_begin, OutIt out_end,
+                          ExtractKey&& extract_key) {
+    count_type counts0[256] = {};
+    count_type counts1[256] = {};
+    count_type counts2[256] = {};
+    count_type counts3[256] = {};
+
+    for (It it = begin; it != end; ++it) {
+      uint32_t key = to_unsigned_or_bool(extract_key(*it));
+      ++counts0[key & 0xff];
+      ++counts1[(key >> 8) & 0xff];
+      ++counts2[(key >> 16) & 0xff];
+      ++counts3[(key >> 24) & 0xff];
     }
-    template<typename count_type, typename It, typename OutIt, typename ExtractKey>
-    static bool sort_inline(It begin, It end, OutIt out_begin, OutIt out_end, ExtractKey && extract_key)
-    {
-        count_type counts0[256] = {};
-        count_type counts1[256] = {};
-        count_type counts2[256] = {};
-        count_type counts3[256] = {};
-
-        for (It it = begin; it != end; ++it)
-        {
-            uint32_t key = to_unsigned_or_bool(extract_key(*it));
-            ++counts0[key & 0xff];
-            ++counts1[(key >> 8) & 0xff];
-            ++counts2[(key >> 16) & 0xff];
-            ++counts3[(key >> 24) & 0xff];
-        }
-        count_type total0 = 0;
-        count_type total1 = 0;
-        count_type total2 = 0;
-        count_type total3 = 0;
-        for (int i = 0; i < 256; ++i)
-        {
-            count_type old_count0 = counts0[i];
-            count_type old_count1 = counts1[i];
-            count_type old_count2 = counts2[i];
-            count_type old_count3 = counts3[i];
-            counts0[i] = total0;
-            counts1[i] = total1;
-            counts2[i] = total2;
-            counts3[i] = total3;
-            total0 += old_count0;
-            total1 += old_count1;
-            total2 += old_count2;
-            total3 += old_count3;
-        }
-        for (It it = begin; it != end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it));
-            out_begin[counts0[key]++] = std::move(*it);
-        }
-        for (OutIt it = out_begin; it != out_end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 8;
-            begin[counts1[key]++] = std::move(*it);
-        }
-        for (It it = begin; it != end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 16;
-            out_begin[counts2[key]++] = std::move(*it);
-        }
-        for (OutIt it = out_begin; it != out_end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 24;
-            begin[counts3[key]++] = std::move(*it);
-        }
-        return false;
+    count_type total0 = 0;
+    count_type total1 = 0;
+    count_type total2 = 0;
+    count_type total3 = 0;
+    for (int i = 0; i < 256; ++i) {
+      count_type old_count0 = counts0[i];
+      count_type old_count1 = counts1[i];
+      count_type old_count2 = counts2[i];
+      count_type old_count3 = counts3[i];
+      counts0[i] = total0;
+      counts1[i] = total1;
+      counts2[i] = total2;
+      counts3[i] = total3;
+      total0 += old_count0;
+      total1 += old_count1;
+      total2 += old_count2;
+      total3 += old_count3;
+    }
+    for (It it = begin; it != end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it));
+      out_begin[counts0[key]++] = std::move(*it);
     }
+    for (OutIt it = out_begin; it != out_end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 8;
+      begin[counts1[key]++] = std::move(*it);
+    }
+    for (It it = begin; it != end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 16;
+      out_begin[counts2[key]++] = std::move(*it);
+    }
+    for (OutIt it = out_begin; it != out_end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 24;
+      begin[counts3[key]++] = std::move(*it);
+    }
+    return false;
+  }
 
-    static constexpr size_t pass_count = 5;
+  static constexpr size_t pass_count = 5;
 };
-template<>
-struct SizedRadixSorter<8>
-{
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        std::ptrdiff_t num_elements = end - begin;
-        if (num_elements <= (1ll << 32))
-            return sort_inline<uint32_t>(begin, end, buffer_begin, buffer_begin + num_elements, extract_key);
-        else
-            return sort_inline<uint64_t>(begin, end, buffer_begin, buffer_begin + num_elements, extract_key);
+template <>
+struct SizedRadixSorter<8> {
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    std::ptrdiff_t num_elements = end - begin;
+    if (num_elements <= (1ll << 32))
+      return sort_inline<uint32_t>(begin, end, buffer_begin, buffer_begin + num_elements,
+                                   extract_key);
+    else
+      return sort_inline<uint64_t>(begin, end, buffer_begin, buffer_begin + num_elements,
+                                   extract_key);
+  }
+  template <typename count_type, typename It, typename OutIt, typename ExtractKey>
+  static bool sort_inline(It begin, It end, OutIt out_begin, OutIt out_end,
+                          ExtractKey&& extract_key) {
+    count_type counts0[256] = {};
+    count_type counts1[256] = {};
+    count_type counts2[256] = {};
+    count_type counts3[256] = {};
+    count_type counts4[256] = {};
+    count_type counts5[256] = {};
+    count_type counts6[256] = {};
+    count_type counts7[256] = {};
+
+    for (It it = begin; it != end; ++it) {
+      uint64_t key = to_unsigned_or_bool(extract_key(*it));
+      ++counts0[key & 0xff];
+      ++counts1[(key >> 8) & 0xff];
+      ++counts2[(key >> 16) & 0xff];
+      ++counts3[(key >> 24) & 0xff];
+      ++counts4[(key >> 32) & 0xff];
+      ++counts5[(key >> 40) & 0xff];
+      ++counts6[(key >> 48) & 0xff];
+      ++counts7[(key >> 56) & 0xff];
     }
-    template<typename count_type, typename It, typename OutIt, typename ExtractKey>
-    static bool sort_inline(It begin, It end, OutIt out_begin, OutIt out_end, ExtractKey && extract_key)
-    {
-        count_type counts0[256] = {};
-        count_type counts1[256] = {};
-        count_type counts2[256] = {};
-        count_type counts3[256] = {};
-        count_type counts4[256] = {};
-        count_type counts5[256] = {};
-        count_type counts6[256] = {};
-        count_type counts7[256] = {};
-
-        for (It it = begin; it != end; ++it)
-        {
-            uint64_t key = to_unsigned_or_bool(extract_key(*it));
-            ++counts0[key & 0xff];
-            ++counts1[(key >> 8) & 0xff];
-            ++counts2[(key >> 16) & 0xff];
-            ++counts3[(key >> 24) & 0xff];
-            ++counts4[(key >> 32) & 0xff];
-            ++counts5[(key >> 40) & 0xff];
-            ++counts6[(key >> 48) & 0xff];
-            ++counts7[(key >> 56) & 0xff];
-        }
-        count_type total0 = 0;
-        count_type total1 = 0;
-        count_type total2 = 0;
-        count_type total3 = 0;
-        count_type total4 = 0;
-        count_type total5 = 0;
-        count_type total6 = 0;
-        count_type total7 = 0;
-        for (int i = 0; i < 256; ++i)
-        {
-            count_type old_count0 = counts0[i];
-            count_type old_count1 = counts1[i];
-            count_type old_count2 = counts2[i];
-            count_type old_count3 = counts3[i];
-            count_type old_count4 = counts4[i];
-            count_type old_count5 = counts5[i];
-            count_type old_count6 = counts6[i];
-            count_type old_count7 = counts7[i];
-            counts0[i] = total0;
-            counts1[i] = total1;
-            counts2[i] = total2;
-            counts3[i] = total3;
-            counts4[i] = total4;
-            counts5[i] = total5;
-            counts6[i] = total6;
-            counts7[i] = total7;
-            total0 += old_count0;
-            total1 += old_count1;
-            total2 += old_count2;
-            total3 += old_count3;
-            total4 += old_count4;
-            total5 += old_count5;
-            total6 += old_count6;
-            total7 += old_count7;
-        }
-        for (It it = begin; it != end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it));
-            out_begin[counts0[key]++] = std::move(*it);
-        }
-        for (OutIt it = out_begin; it != out_end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 8;
-            begin[counts1[key]++] = std::move(*it);
-        }
-        for (It it = begin; it != end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 16;
-            out_begin[counts2[key]++] = std::move(*it);
-        }
-        for (OutIt it = out_begin; it != out_end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 24;
-            begin[counts3[key]++] = std::move(*it);
-        }
-        for (It it = begin; it != end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 32;
-            out_begin[counts4[key]++] = std::move(*it);
-        }
-        for (OutIt it = out_begin; it != out_end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 40;
-            begin[counts5[key]++] = std::move(*it);
-        }
-        for (It it = begin; it != end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 48;
-            out_begin[counts6[key]++] = std::move(*it);
-        }
-        for (OutIt it = out_begin; it != out_end; ++it)
-        {
-            std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 56;
-            begin[counts7[key]++] = std::move(*it);
-        }
-        return false;
+    count_type total0 = 0;
+    count_type total1 = 0;
+    count_type total2 = 0;
+    count_type total3 = 0;
+    count_type total4 = 0;
+    count_type total5 = 0;
+    count_type total6 = 0;
+    count_type total7 = 0;
+    for (int i = 0; i < 256; ++i) {
+      count_type old_count0 = counts0[i];
+      count_type old_count1 = counts1[i];
+      count_type old_count2 = counts2[i];
+      count_type old_count3 = counts3[i];
+      count_type old_count4 = counts4[i];
+      count_type old_count5 = counts5[i];
+      count_type old_count6 = counts6[i];
+      count_type old_count7 = counts7[i];
+      counts0[i] = total0;
+      counts1[i] = total1;
+      counts2[i] = total2;
+      counts3[i] = total3;
+      counts4[i] = total4;
+      counts5[i] = total5;
+      counts6[i] = total6;
+      counts7[i] = total7;
+      total0 += old_count0;
+      total1 += old_count1;
+      total2 += old_count2;
+      total3 += old_count3;
+      total4 += old_count4;
+      total5 += old_count5;
+      total6 += old_count6;
+      total7 += old_count7;
+    }
+    for (It it = begin; it != end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it));
+      out_begin[counts0[key]++] = std::move(*it);
+    }
+    for (OutIt it = out_begin; it != out_end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 8;
+      begin[counts1[key]++] = std::move(*it);
+    }
+    for (It it = begin; it != end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 16;
+      out_begin[counts2[key]++] = std::move(*it);
+    }
+    for (OutIt it = out_begin; it != out_end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 24;
+      begin[counts3[key]++] = std::move(*it);
+    }
+    for (It it = begin; it != end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 32;
+      out_begin[counts4[key]++] = std::move(*it);
     }
+    for (OutIt it = out_begin; it != out_end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 40;
+      begin[counts5[key]++] = std::move(*it);
+    }
+    for (It it = begin; it != end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 48;
+      out_begin[counts6[key]++] = std::move(*it);
+    }
+    for (OutIt it = out_begin; it != out_end; ++it) {
+      std::uint8_t key = to_unsigned_or_bool(extract_key(*it)) >> 56;
+      begin[counts7[key]++] = std::move(*it);
+    }
+    return false;
+  }
 
-    static constexpr size_t pass_count = 9;
+  static constexpr size_t pass_count = 9;
 };
 
-template<typename>
+template <typename>
 struct RadixSorter;
-template<>
-struct RadixSorter<bool>
-{
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        size_t false_count = 0;
-        for (It it = begin; it != end; ++it)
-        {
-            if (!extract_key(*it))
-                ++false_count;
-        }
-        size_t true_position = false_count;
-        false_count = 0;
-        for (; begin != end; ++begin)
-        {
-            if (extract_key(*begin))
-                buffer_begin[true_position++] = std::move(*begin);
-            else
-                buffer_begin[false_count++] = std::move(*begin);
-        }
-        return true;
+template <>
+struct RadixSorter<bool> {
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    size_t false_count = 0;
+    for (It it = begin; it != end; ++it) {
+      if (!extract_key(*it)) ++false_count;
     }
-
-    static constexpr size_t pass_count = 2;
-};
-template<>
-struct RadixSorter<signed char> : SizedRadixSorter<sizeof(signed char)>
-{
-};
-template<>
-struct RadixSorter<unsigned char> : SizedRadixSorter<sizeof(unsigned char)>
-{
-};
-template<>
-struct RadixSorter<signed short> : SizedRadixSorter<sizeof(signed short)>
-{
-};
-template<>
-struct RadixSorter<unsigned short> : SizedRadixSorter<sizeof(unsigned short)>
-{
-};
-template<>
-struct RadixSorter<signed int> : SizedRadixSorter<sizeof(signed int)>
-{
-};
-template<>
-struct RadixSorter<unsigned int> : SizedRadixSorter<sizeof(unsigned int)>
-{
-};
-template<>
-struct RadixSorter<signed long> : SizedRadixSorter<sizeof(signed long)>
-{
-};
-template<>
-struct RadixSorter<unsigned long> : SizedRadixSorter<sizeof(unsigned long)>
-{
-};
-template<>
-struct RadixSorter<signed long long> : SizedRadixSorter<sizeof(signed long long)>
-{
-};
-template<>
-struct RadixSorter<unsigned long long> : SizedRadixSorter<sizeof(unsigned long long)>
-{
-};
-template<>
-struct RadixSorter<float> : SizedRadixSorter<sizeof(float)>
-{
-};
-template<>
-struct RadixSorter<double> : SizedRadixSorter<sizeof(double)>
-{
-};
-template<>
-struct RadixSorter<char> : SizedRadixSorter<sizeof(char)>
-{
-};
-template<>
-struct RadixSorter<wchar_t> : SizedRadixSorter<sizeof(wchar_t)>
-{
-};
-template<>
-struct RadixSorter<char16_t> : SizedRadixSorter<sizeof(char16_t)>
-{
-};
-template<>
-struct RadixSorter<char32_t> : SizedRadixSorter<sizeof(char32_t)>
-{
-};
-template<typename K, typename V>
-struct RadixSorter<std::pair<K, V>>
-{
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        bool first_result = RadixSorter<V>::sort(begin, end, buffer_begin, [&](auto && o)
-        {
-            return extract_key(o).second;
-        });
-        auto extract_first = [&](auto && o)
-        {
-            return extract_key(o).first;
-        };
-
-        if (first_result)
-        {
-            return !RadixSorter<K>::sort(buffer_begin, buffer_begin + (end - begin), begin, extract_first);
-        }
-        else
-        {
-            return RadixSorter<K>::sort(begin, end, buffer_begin, extract_first);
-        }
+    size_t true_position = false_count;
+    false_count = 0;
+    for (; begin != end; ++begin) {
+      if (extract_key(*begin))
+        buffer_begin[true_position++] = std::move(*begin);
+      else
+        buffer_begin[false_count++] = std::move(*begin);
     }
-
-    static constexpr size_t pass_count = RadixSorter<K>::pass_count + RadixSorter<V>::pass_count;
-};
-template<typename K, typename V>
-struct RadixSorter<const std::pair<K, V> &>
-{
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        bool first_result = RadixSorter<V>::sort(begin, end, buffer_begin, [&](auto && o) -> const V &
-        {
-            return extract_key(o).second;
-        });
-        auto extract_first = [&](auto && o) -> const K &
-        {
-            return extract_key(o).first;
-        };
-
-        if (first_result)
-        {
-            return !RadixSorter<K>::sort(buffer_begin, buffer_begin + (end - begin), begin, extract_first);
-        }
-        else
-        {
-            return RadixSorter<K>::sort(begin, end, buffer_begin, extract_first);
-        }
+    return true;
+  }
+
+  static constexpr size_t pass_count = 2;
+};
+template <>
+struct RadixSorter<signed char> : SizedRadixSorter<sizeof(signed char)> {};
+template <>
+struct RadixSorter<unsigned char> : SizedRadixSorter<sizeof(unsigned char)> {};
+template <>
+struct RadixSorter<signed short> : SizedRadixSorter<sizeof(signed short)> {};
+template <>
+struct RadixSorter<unsigned short> : SizedRadixSorter<sizeof(unsigned short)> {};
+template <>
+struct RadixSorter<signed int> : SizedRadixSorter<sizeof(signed int)> {};
+template <>
+struct RadixSorter<unsigned int> : SizedRadixSorter<sizeof(unsigned int)> {};
+template <>
+struct RadixSorter<signed long> : SizedRadixSorter<sizeof(signed long)> {};
+template <>
+struct RadixSorter<unsigned long> : SizedRadixSorter<sizeof(unsigned long)> {};
+template <>
+struct RadixSorter<signed long long> : SizedRadixSorter<sizeof(signed long long)> {};
+template <>
+struct RadixSorter<unsigned long long> : SizedRadixSorter<sizeof(unsigned long long)> {};
+template <>
+struct RadixSorter<float> : SizedRadixSorter<sizeof(float)> {};
+template <>
+struct RadixSorter<double> : SizedRadixSorter<sizeof(double)> {};
+template <>
+struct RadixSorter<char> : SizedRadixSorter<sizeof(char)> {};
+template <>
+struct RadixSorter<wchar_t> : SizedRadixSorter<sizeof(wchar_t)> {};
+template <>
+struct RadixSorter<char16_t> : SizedRadixSorter<sizeof(char16_t)> {};
+template <>
+struct RadixSorter<char32_t> : SizedRadixSorter<sizeof(char32_t)> {};
+template <typename K, typename V>
+struct RadixSorter<std::pair<K, V>> {
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    bool first_result = RadixSorter<V>::sort(
+        begin, end, buffer_begin, [&](auto&& o) { return extract_key(o).second; });
+    auto extract_first = [&](auto&& o) { return extract_key(o).first; };
+
+    if (first_result) {
+      return !RadixSorter<K>::sort(buffer_begin, buffer_begin + (end - begin), begin,
+                                   extract_first);
+    } else {
+      return RadixSorter<K>::sort(begin, end, buffer_begin, extract_first);
     }
-
-    static constexpr size_t pass_count = RadixSorter<K>::pass_count + RadixSorter<V>::pass_count;
-};
-template<size_t I, size_t S, typename Tuple>
-struct TupleRadixSorter
-{
-    using NextSorter = TupleRadixSorter<I + 1, S, Tuple>;
-    using ThisSorter = RadixSorter<typename std::tuple_element<I, Tuple>::type>;
-
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt out_begin, OutIt out_end, ExtractKey && extract_key)
-    {
-        bool which = NextSorter::sort(begin, end, out_begin, out_end, extract_key);
-        auto extract_i = [&](auto && o)
-        {
-            return std::get<I>(extract_key(o));
-        };
-        if (which)
-            return !ThisSorter::sort(out_begin, out_end, begin, extract_i);
-        else
-            return ThisSorter::sort(begin, end, out_begin, extract_i);
-    }
-
-    static constexpr size_t pass_count = ThisSorter::pass_count + NextSorter::pass_count;
-};
-template<size_t I, size_t S, typename Tuple>
-struct TupleRadixSorter<I, S, const Tuple &>
-{
-    using NextSorter = TupleRadixSorter<I + 1, S, const Tuple &>;
-    using ThisSorter = RadixSorter<typename std::tuple_element<I, Tuple>::type>;
-
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt out_begin, OutIt out_end, ExtractKey && extract_key)
-    {
-        bool which = NextSorter::sort(begin, end, out_begin, out_end, extract_key);
-        auto extract_i = [&](auto && o) -> decltype(auto)
-        {
-            return std::get<I>(extract_key(o));
-        };
-        if (which)
-            return !ThisSorter::sort(out_begin, out_end, begin, extract_i);
-        else
-            return ThisSorter::sort(begin, end, out_begin, extract_i);
+  }
+
+  static constexpr size_t pass_count =
+      RadixSorter<K>::pass_count + RadixSorter<V>::pass_count;
+};
+template <typename K, typename V>
+struct RadixSorter<const std::pair<K, V>&> {
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    bool first_result =
+        RadixSorter<V>::sort(begin, end, buffer_begin,
+                             [&](auto&& o) -> const V& { return extract_key(o).second; });
+    auto extract_first = [&](auto&& o) -> const K& { return extract_key(o).first; };
+
+    if (first_result) {
+      return !RadixSorter<K>::sort(buffer_begin, buffer_begin + (end - begin), begin,
+                                   extract_first);
+    } else {
+      return RadixSorter<K>::sort(begin, end, buffer_begin, extract_first);
     }
+  }
+
+  static constexpr size_t pass_count =
+      RadixSorter<K>::pass_count + RadixSorter<V>::pass_count;
+};
+template <size_t I, size_t S, typename Tuple>
+struct TupleRadixSorter {
+  using NextSorter = TupleRadixSorter<I + 1, S, Tuple>;
+  using ThisSorter = RadixSorter<typename std::tuple_element<I, Tuple>::type>;
+
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt out_begin, OutIt out_end,
+                   ExtractKey&& extract_key) {
+    bool which = NextSorter::sort(begin, end, out_begin, out_end, extract_key);
+    auto extract_i = [&](auto&& o) { return std::get<I>(extract_key(o)); };
+    if (which)
+      return !ThisSorter::sort(out_begin, out_end, begin, extract_i);
+    else
+      return ThisSorter::sort(begin, end, out_begin, extract_i);
+  }
+
+  static constexpr size_t pass_count = ThisSorter::pass_count + NextSorter::pass_count;
+};
+template <size_t I, size_t S, typename Tuple>
+struct TupleRadixSorter<I, S, const Tuple&> {
+  using NextSorter = TupleRadixSorter<I + 1, S, const Tuple&>;
+  using ThisSorter = RadixSorter<typename std::tuple_element<I, Tuple>::type>;
+
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt out_begin, OutIt out_end,
+                   ExtractKey&& extract_key) {
+    bool which = NextSorter::sort(begin, end, out_begin, out_end, extract_key);
+    auto extract_i = [&](auto&& o) -> decltype(auto) {
+      return std::get<I>(extract_key(o));
+    };
+    if (which)
+      return !ThisSorter::sort(out_begin, out_end, begin, extract_i);
+    else
+      return ThisSorter::sort(begin, end, out_begin, extract_i);
+  }
 
-    static constexpr size_t pass_count = ThisSorter::pass_count + NextSorter::pass_count;
+  static constexpr size_t pass_count = ThisSorter::pass_count + NextSorter::pass_count;
 };
-template<size_t I, typename Tuple>
-struct TupleRadixSorter<I, I, Tuple>
-{
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It, It, OutIt, OutIt, ExtractKey &&)
-    {
-        return false;
-    }
+template <size_t I, typename Tuple>
+struct TupleRadixSorter<I, I, Tuple> {
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It, It, OutIt, OutIt, ExtractKey&&) {
+    return false;
+  }
 
-    static constexpr size_t pass_count = 0;
+  static constexpr size_t pass_count = 0;
 };
-template<size_t I, typename Tuple>
-struct TupleRadixSorter<I, I, const Tuple &>
-{
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It, It, OutIt, OutIt, ExtractKey &&)
-    {
-        return false;
-    }
+template <size_t I, typename Tuple>
+struct TupleRadixSorter<I, I, const Tuple&> {
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It, It, OutIt, OutIt, ExtractKey&&) {
+    return false;
+  }
 
-    static constexpr size_t pass_count = 0;
+  static constexpr size_t pass_count = 0;
 };
 
-template<typename... Args>
-struct RadixSorter<std::tuple<Args...>>
-{
-    using SorterImpl = TupleRadixSorter<0, sizeof...(Args), std::tuple<Args...>>;
+template <typename... Args>
+struct RadixSorter<std::tuple<Args...>> {
+  using SorterImpl = TupleRadixSorter<0, sizeof...(Args), std::tuple<Args...>>;
 
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        return SorterImpl::sort(begin, end, buffer_begin, buffer_begin + (end - begin), extract_key);
-    }
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    return SorterImpl::sort(begin, end, buffer_begin, buffer_begin + (end - begin),
+                            extract_key);
+  }
 
-    static constexpr size_t pass_count = SorterImpl::pass_count;
+  static constexpr size_t pass_count = SorterImpl::pass_count;
 };
 
-template<typename... Args>
-struct RadixSorter<const std::tuple<Args...> &>
-{
-    using SorterImpl = TupleRadixSorter<0, sizeof...(Args), const std::tuple<Args...> &>;
+template <typename... Args>
+struct RadixSorter<const std::tuple<Args...>&> {
+  using SorterImpl = TupleRadixSorter<0, sizeof...(Args), const std::tuple<Args...>&>;
 
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        return SorterImpl::sort(begin, end, buffer_begin, buffer_begin + (end - begin), extract_key);
-    }
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    return SorterImpl::sort(begin, end, buffer_begin, buffer_begin + (end - begin),
+                            extract_key);
+  }
 
-    static constexpr size_t pass_count = SorterImpl::pass_count;
+  static constexpr size_t pass_count = SorterImpl::pass_count;
 };
 
-template<typename T, size_t S>
-struct RadixSorter<std::array<T, S>>
-{
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        auto buffer_end = buffer_begin + (end - begin);
-        bool which = false;
-        for (size_t i = S; i > 0; --i)
-        {
-            auto extract_i = [&, i = i - 1](auto && o)
-            {
-                return extract_key(o)[i];
-            };
-            if (which)
-                which = !RadixSorter<T>::sort(buffer_begin, buffer_end, begin, extract_i);
-            else
-                which = RadixSorter<T>::sort(begin, end, buffer_begin, extract_i);
-        }
-        return which;
+template <typename T, size_t S>
+struct RadixSorter<std::array<T, S>> {
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    auto buffer_end = buffer_begin + (end - begin);
+    bool which = false;
+    for (size_t i = S; i > 0; --i) {
+      auto extract_i = [&, i = i - 1](auto&& o) { return extract_key(o)[i]; };
+      if (which)
+        which = !RadixSorter<T>::sort(buffer_begin, buffer_end, begin, extract_i);
+      else
+        which = RadixSorter<T>::sort(begin, end, buffer_begin, extract_i);
     }
-
-    static constexpr size_t pass_count = RadixSorter<T>::pass_count * S;
-};
-
-template<typename T>
-struct RadixSorter<const T> : RadixSorter<T>
-{
-};
-template<typename T>
-struct RadixSorter<T &> : RadixSorter<const T &>
-{
-};
-template<typename T>
-struct RadixSorter<T &&> : RadixSorter<T>
-{
-};
-template<typename T>
-struct RadixSorter<const T &> : RadixSorter<T>
-{
-};
-template<typename T>
-struct RadixSorter<const T &&> : RadixSorter<T>
-{
-};
+    return which;
+  }
+
+  static constexpr size_t pass_count = RadixSorter<T>::pass_count * S;
+};
+
+template <typename T>
+struct RadixSorter<const T> : RadixSorter<T> {};
+template <typename T>
+struct RadixSorter<T&> : RadixSorter<const T&> {};
+template <typename T>
+struct RadixSorter<T&&> : RadixSorter<T> {};
+template <typename T>
+struct RadixSorter<const T&> : RadixSorter<T> {};
+template <typename T>
+struct RadixSorter<const T&&> : RadixSorter<T> {};
 // these structs serve two purposes
-// 1. they serve as illustration for how to implement the to_radix_sort_key function
+// 1. they serve as illustration for how to implement the to_radix_sort_key
+// function
 // 2. they help produce better error messages. with these overloads you get the
 //    error message "no matching function for call to to_radix_sort(your_type)"
 //    without these examples, you'd get the error message "to_radix_sort_key was
 //    not declared in this scope" which is a much less useful error message
-struct ExampleStructA { int i; };
-struct ExampleStructB { float f; };
+struct ExampleStructA {
+  int i;
+};
+struct ExampleStructB {
+  float f;
+};
 inline int to_radix_sort_key(ExampleStructA a) { return a.i; }
 inline float to_radix_sort_key(ExampleStructB b) { return b.f; }
-template<typename T, typename Enable = void>
-struct FallbackRadixSorter : RadixSorter<decltype(to_radix_sort_key(std::declval<T>()))>
-{
-    using base = RadixSorter<decltype(to_radix_sort_key(std::declval<T>()))>;
-
-    template<typename It, typename OutIt, typename ExtractKey>
-    static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey && extract_key)
-    {
-        return base::sort(begin, end, buffer_begin, [&](auto && a) -> decltype(auto)
-        {
-            return to_radix_sort_key(extract_key(a));
-        });
-    }
+template <typename T, typename Enable = void>
+struct FallbackRadixSorter : RadixSorter<decltype(to_radix_sort_key(std::declval<T>()))> {
+  using base = RadixSorter<decltype(to_radix_sort_key(std::declval<T>()))>;
+
+  template <typename It, typename OutIt, typename ExtractKey>
+  static bool sort(It begin, It end, OutIt buffer_begin, ExtractKey&& extract_key) {
+    return base::sort(begin, end, buffer_begin, [&](auto&& a) -> decltype(auto) {
+      return to_radix_sort_key(extract_key(a));
+    });
+  }
 };
 
-template<typename...>
-struct nested_void
-{
-	using type = void;
+template <typename...>
+struct nested_void {
+  using type = void;
 };
 
-template<typename... Args>
+template <typename... Args>
 using void_t = typename nested_void<Args...>::type;
 
-template<typename T>
-struct has_subscript_operator_impl
-{
-	template<typename U, typename = decltype(std::declval<U>()[0])>
-	static std::true_type test(int);
-	template<typename>
-	static std::false_type test(...);
+template <typename T>
+struct has_subscript_operator_impl {
+  template <typename U, typename = decltype(std::declval<U>()[0])>
+  static std::true_type test(int);
+  template <typename>
+  static std::false_type test(...);
 
-	using type = decltype(test<T>(0));
+  using type = decltype(test<T>(0));
 };
 
-template<typename T>
+template <typename T>
 using has_subscript_operator = typename has_subscript_operator_impl<T>::type;
 
-
-template<typename T>
+template <typename T>
 struct FallbackRadixSorter<T, void_t<decltype(to_unsigned_or_bool(std::declval<T>()))>>
-    : RadixSorter<decltype(to_unsigned_or_bool(std::declval<T>()))>
-{
-};
+    : RadixSorter<decltype(to_unsigned_or_bool(std::declval<T>()))> {};
 
-template<typename T>
-struct RadixSorter : FallbackRadixSorter<T>
-{
-};
+template <typename T>
+struct RadixSorter : FallbackRadixSorter<T> {};
 
-template<typename T>
+template <typename T>
 size_t radix_sort_pass_count = RadixSorter<T>::pass_count;
 
-template<typename It, typename Func>
-inline void unroll_loop_four_times(It begin, size_t iteration_count, Func && to_call)
-{
-    size_t loop_count = iteration_count / 4;
-    size_t remainder_count = iteration_count - loop_count * 4;
-    for (; loop_count > 0; --loop_count)
-    {
-        to_call(begin);
-        ++begin;
-        to_call(begin);
-        ++begin;
-        to_call(begin);
-        ++begin;
-        to_call(begin);
-        ++begin;
-    }
-    switch(remainder_count)
-    {
+template <typename It, typename Func>
+inline void unroll_loop_four_times(It begin, size_t iteration_count, Func&& to_call) {
+  size_t loop_count = iteration_count / 4;
+  size_t remainder_count = iteration_count - loop_count * 4;
+  for (; loop_count > 0; --loop_count) {
+    to_call(begin);
+    ++begin;
+    to_call(begin);
+    ++begin;
+    to_call(begin);
+    ++begin;
+    to_call(begin);
+    ++begin;
+  }
+  switch (remainder_count) {
     case 3:
-        to_call(begin);
-        ++begin;
+      to_call(begin);
+      ++begin;
     case 2:
-        to_call(begin);
-        ++begin;
+      to_call(begin);
+      ++begin;
     case 1:
-        to_call(begin);
-    }
+      to_call(begin);
+  }
 }
 
-template<typename It, typename F>
-inline It custom_std_partition(It begin, It end, F && func)
-{
-    for (;; ++begin)
-    {
-        if (begin == end)
-            return end;
-        if (!func(*begin))
-            break;
-    }
-    It it = begin;
-    for(++it; it != end; ++it)
-    {
-        if (!func(*it))
-            continue;
-
-        std::iter_swap(begin, it);
-        ++begin;
-    }
-    return begin;
+template <typename It, typename F>
+inline It custom_std_partition(It begin, It end, F&& func) {
+  for (;; ++begin) {
+    if (begin == end) return end;
+    if (!func(*begin)) break;
+  }
+  It it = begin;
+  for (++it; it != end; ++it) {
+    if (!func(*it)) continue;
+
+    std::iter_swap(begin, it);
+    ++begin;
+  }
+  return begin;
 }
 
-struct PartitionInfo
-{
-    PartitionInfo()
-        : count(0)
-    {
-    }
+struct PartitionInfo {
+  PartitionInfo() : count(0) {}
 
-    union
-    {
-        size_t count;
-        size_t offset;
-    };
-    size_t next_offset;
+  union {
+    size_t count;
+    size_t offset;
+  };
+  size_t next_offset;
 };
 
-template<size_t>
+template <size_t>
 struct UnsignedForSize;
-template<>
-struct UnsignedForSize<1>
-{
-    typedef uint8_t type;
+template <>
+struct UnsignedForSize<1> {
+  typedef uint8_t type;
 };
-template<>
-struct UnsignedForSize<2>
-{
-    typedef uint16_t type;
+template <>
+struct UnsignedForSize<2> {
+  typedef uint16_t type;
 };
-template<>
-struct UnsignedForSize<4>
-{
-    typedef uint32_t type;
+template <>
+struct UnsignedForSize<4> {
+  typedef uint32_t type;
 };
-template<>
-struct UnsignedForSize<8>
-{
-    typedef uint64_t type;
+template <>
+struct UnsignedForSize<8> {
+  typedef uint64_t type;
 };
-template<typename T>
+template <typename T>
 struct SubKey;
-template<size_t Size>
-struct SizedSubKey
-{
-    template<typename T>
-    static auto sub_key(T && value, void *)
-    {
-        return to_unsigned_or_bool(value);
-    }
-
-    typedef SubKey<void> next;
-
-    using sub_key_type = typename UnsignedForSize<Size>::type;
-};
-template<typename T>
-struct SubKey<const T> : SubKey<T>
-{
-};
-template<typename T>
-struct SubKey<T &> : SubKey<T>
-{
-};
-template<typename T>
-struct SubKey<T &&> : SubKey<T>
-{
-};
-template<typename T>
-struct SubKey<const T &> : SubKey<T>
-{
-};
-template<typename T>
-struct SubKey<const T &&> : SubKey<T>
-{
-};
-template<typename T, typename Enable = void>
-struct FallbackSubKey
-    : SubKey<decltype(to_radix_sort_key(std::declval<T>()))>
-{
-    using base = SubKey<decltype(to_radix_sort_key(std::declval<T>()))>;
-
-    template<typename U>
-    static decltype(auto) sub_key(U && value, void * data)
-    {
-        return base::sub_key(to_radix_sort_key(value), data);
-    }
-};
-template<typename T>
+template <size_t Size>
+struct SizedSubKey {
+  template <typename T>
+  static auto sub_key(T&& value, void*) {
+    return to_unsigned_or_bool(value);
+  }
+
+  typedef SubKey<void> next;
+
+  using sub_key_type = typename UnsignedForSize<Size>::type;
+};
+template <typename T>
+struct SubKey<const T> : SubKey<T> {};
+template <typename T>
+struct SubKey<T&> : SubKey<T> {};
+template <typename T>
+struct SubKey<T&&> : SubKey<T> {};
+template <typename T>
+struct SubKey<const T&> : SubKey<T> {};
+template <typename T>
+struct SubKey<const T&&> : SubKey<T> {};
+template <typename T, typename Enable = void>
+struct FallbackSubKey : SubKey<decltype(to_radix_sort_key(std::declval<T>()))> {
+  using base = SubKey<decltype(to_radix_sort_key(std::declval<T>()))>;
+
+  template <typename U>
+  static decltype(auto) sub_key(U&& value, void* data) {
+    return base::sub_key(to_radix_sort_key(value), data);
+  }
+};
+template <typename T>
 struct FallbackSubKey<T, void_t<decltype(to_unsigned_or_bool(std::declval<T>()))>>
-    : SubKey<decltype(to_unsigned_or_bool(std::declval<T>()))>
-{
-};
-template<typename T>
-struct SubKey : FallbackSubKey<T>
-{
-};
-template<>
-struct SubKey<bool>
-{
-    template<typename T>
-    static bool sub_key(T && value, void *)
-    {
-        return value;
-    }
+    : SubKey<decltype(to_unsigned_or_bool(std::declval<T>()))> {};
+template <typename T>
+struct SubKey : FallbackSubKey<T> {};
+template <>
+struct SubKey<bool> {
+  template <typename T>
+  static bool sub_key(T&& value, void*) {
+    return value;
+  }
 
-    typedef SubKey<void> next;
+  typedef SubKey<void> next;
 
-    using sub_key_type = bool;
+  using sub_key_type = bool;
 };
-template<>
+template <>
 struct SubKey<void>;
-template<>
-struct SubKey<unsigned char> : SizedSubKey<sizeof(unsigned char)>
-{
-};
-template<>
-struct SubKey<unsigned short> : SizedSubKey<sizeof(unsigned short)>
-{
-};
-template<>
-struct SubKey<unsigned int> : SizedSubKey<sizeof(unsigned int)>
-{
-};
-template<>
-struct SubKey<unsigned long> : SizedSubKey<sizeof(unsigned long)>
-{
-};
-template<>
-struct SubKey<unsigned long long> : SizedSubKey<sizeof(unsigned long long)>
-{
-};
-template<typename T>
-struct SubKey<T *> : SizedSubKey<sizeof(T *)>
-{
-};
-template<typename F, typename S, typename Current>
-struct PairSecondSubKey : Current
-{
-    static decltype(auto) sub_key(const std::pair<F, S> & value, void * sort_data)
-    {
-        return Current::sub_key(value.second, sort_data);
-    }
-
-    using next = typename std::conditional<std::is_same<SubKey<void>, typename Current::next>::value, SubKey<void>, PairSecondSubKey<F, S, typename Current::next>>::type;
-};
-template<typename F, typename S, typename Current>
-struct PairFirstSubKey : Current
-{
-    static decltype(auto) sub_key(const std::pair<F, S> & value, void * sort_data)
-    {
-        return Current::sub_key(value.first, sort_data);
-    }
-
-    using next = typename std::conditional<std::is_same<SubKey<void>, typename Current::next>::value, PairSecondSubKey<F, S, SubKey<S>>, PairFirstSubKey<F, S, typename Current::next>>::type;
-};
-template<typename F, typename S>
-struct SubKey<std::pair<F, S>> : PairFirstSubKey<F, S, SubKey<F>>
-{
-};
-template<size_t Index, typename First, typename... More>
-struct TypeAt : TypeAt<Index - 1, More..., void>
-{
-};
-template<typename First, typename... More>
-struct TypeAt<0, First, More...>
-{
-    typedef First type;
-};
-
-template<size_t Index, typename Current, typename First, typename... More>
+template <>
+struct SubKey<unsigned char> : SizedSubKey<sizeof(unsigned char)> {};
+template <>
+struct SubKey<unsigned short> : SizedSubKey<sizeof(unsigned short)> {};
+template <>
+struct SubKey<unsigned int> : SizedSubKey<sizeof(unsigned int)> {};
+template <>
+struct SubKey<unsigned long> : SizedSubKey<sizeof(unsigned long)> {};
+template <>
+struct SubKey<unsigned long long> : SizedSubKey<sizeof(unsigned long long)> {};
+template <typename T>
+struct SubKey<T*> : SizedSubKey<sizeof(T*)> {};
+template <typename F, typename S, typename Current>
+struct PairSecondSubKey : Current {
+  static decltype(auto) sub_key(const std::pair<F, S>& value, void* sort_data) {
+    return Current::sub_key(value.second, sort_data);
+  }
+
+  using next =
+      typename std::conditional<std::is_same<SubKey<void>, typename Current::next>::value,
+                                SubKey<void>,
+                                PairSecondSubKey<F, S, typename Current::next>>::type;
+};
+template <typename F, typename S, typename Current>
+struct PairFirstSubKey : Current {
+  static decltype(auto) sub_key(const std::pair<F, S>& value, void* sort_data) {
+    return Current::sub_key(value.first, sort_data);
+  }
+
+  using next =
+      typename std::conditional<std::is_same<SubKey<void>, typename Current::next>::value,
+                                PairSecondSubKey<F, S, SubKey<S>>,
+                                PairFirstSubKey<F, S, typename Current::next>>::type;
+};
+template <typename F, typename S>
+struct SubKey<std::pair<F, S>> : PairFirstSubKey<F, S, SubKey<F>> {};
+template <size_t Index, typename First, typename... More>
+struct TypeAt : TypeAt<Index - 1, More..., void> {};
+template <typename First, typename... More>
+struct TypeAt<0, First, More...> {
+  typedef First type;
+};
+
+template <size_t Index, typename Current, typename First, typename... More>
 struct TupleSubKey;
 
-template<size_t Index, typename Next, typename First, typename... More>
-struct NextTupleSubKey
-{
-    using type = TupleSubKey<Index, Next, First, More...>;
+template <size_t Index, typename Next, typename First, typename... More>
+struct NextTupleSubKey {
+  using type = TupleSubKey<Index, Next, First, More...>;
 };
-template<size_t Index, typename First, typename Second, typename... More>
-struct NextTupleSubKey<Index, SubKey<void>, First, Second, More...>
-{
-    using type = TupleSubKey<Index + 1, SubKey<Second>, Second, More...>;
+template <size_t Index, typename First, typename Second, typename... More>
+struct NextTupleSubKey<Index, SubKey<void>, First, Second, More...> {
+  using type = TupleSubKey<Index + 1, SubKey<Second>, Second, More...>;
 };
-template<size_t Index, typename First>
-struct NextTupleSubKey<Index, SubKey<void>, First>
-{
-    using type = SubKey<void>;
+template <size_t Index, typename First>
+struct NextTupleSubKey<Index, SubKey<void>, First> {
+  using type = SubKey<void>;
 };
 
-template<size_t Index, typename Current, typename First, typename... More>
-struct TupleSubKey : Current
-{
-    template<typename Tuple>
-    static decltype(auto) sub_key(const Tuple & value, void * sort_data)
-    {
-        return Current::sub_key(std::get<Index>(value), sort_data);
-    }
+template <size_t Index, typename Current, typename First, typename... More>
+struct TupleSubKey : Current {
+  template <typename Tuple>
+  static decltype(auto) sub_key(const Tuple& value, void* sort_data) {
+    return Current::sub_key(std::get<Index>(value), sort_data);
+  }
 
-    using next = typename NextTupleSubKey<Index, typename Current::next, First, More...>::type;
+  using next =
+      typename NextTupleSubKey<Index, typename Current::next, First, More...>::type;
 };
-template<size_t Index, typename Current, typename First>
-struct TupleSubKey<Index, Current, First> : Current
-{
-    template<typename Tuple>
-    static decltype(auto) sub_key(const Tuple & value, void * sort_data)
-    {
-        return Current::sub_key(std::get<Index>(value), sort_data);
-    }
+template <size_t Index, typename Current, typename First>
+struct TupleSubKey<Index, Current, First> : Current {
+  template <typename Tuple>
+  static decltype(auto) sub_key(const Tuple& value, void* sort_data) {
+    return Current::sub_key(std::get<Index>(value), sort_data);
+  }
 
-    using next = typename NextTupleSubKey<Index, typename Current::next, First>::type;
-};
-template<typename First, typename... More>
-struct SubKey<std::tuple<First, More...>> : TupleSubKey<0, SubKey<First>, First, More...>
-{
+  using next = typename NextTupleSubKey<Index, typename Current::next, First>::type;
 };
+template <typename First, typename... More>
+struct SubKey<std::tuple<First, More...>>
+    : TupleSubKey<0, SubKey<First>, First, More...> {};
 
-struct BaseListSortData
-{
-    size_t current_index;
-    size_t recursion_limit;
-    void * next_sort_data;
+struct BaseListSortData {
+  size_t current_index;
+  size_t recursion_limit;
+  void* next_sort_data;
 };
-template<typename It, typename ExtractKey>
-struct ListSortData : BaseListSortData
-{
-    void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey &, void *);
+template <typename It, typename ExtractKey>
+struct ListSortData : BaseListSortData {
+  void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey&, void*);
 };
 
-template<typename CurrentSubKey, typename T>
-struct ListElementSubKey : SubKey<typename std::decay<decltype(std::declval<T>()[0])>::type>
-{
-    using base = SubKey<typename std::decay<decltype(std::declval<T>()[0])>::type>;
+template <typename CurrentSubKey, typename T>
+struct ListElementSubKey
+    : SubKey<typename std::decay<decltype(std::declval<T>()[0])>::type> {
+  using base = SubKey<typename std::decay<decltype(std::declval<T>()[0])>::type>;
 
-    using next = ListElementSubKey;
+  using next = ListElementSubKey;
 
-    template<typename U>
-    static decltype(auto) sub_key(U && value, void * sort_data)
-    {
-        BaseListSortData * list_sort_data = static_cast<BaseListSortData *>(sort_data);
-        const T & list = CurrentSubKey::sub_key(value, list_sort_data->next_sort_data);
-        return base::sub_key(list[list_sort_data->current_index], list_sort_data->next_sort_data);
-    }
+  template <typename U>
+  static decltype(auto) sub_key(U&& value, void* sort_data) {
+    BaseListSortData* list_sort_data = static_cast<BaseListSortData*>(sort_data);
+    const T& list = CurrentSubKey::sub_key(value, list_sort_data->next_sort_data);
+    return base::sub_key(list[list_sort_data->current_index],
+                         list_sort_data->next_sort_data);
+  }
 };
 
-template<typename T>
-struct ListSubKey
-{
-    using next = SubKey<void>;
+template <typename T>
+struct ListSubKey {
+  using next = SubKey<void>;
 
-    using sub_key_type = T;
+  using sub_key_type = T;
 
-    static const T & sub_key(const T & value, void *)
-    {
-        return value;
-    }
+  static const T& sub_key(const T& value, void*) { return value; }
 };
 
-template<typename T>
-struct FallbackSubKey<T, typename std::enable_if<has_subscript_operator<T>::value>::type> : ListSubKey<T>
-{
-};
+template <typename T>
+struct FallbackSubKey<T, typename std::enable_if<has_subscript_operator<T>::value>::type>
+    : ListSubKey<T> {};
 
-template<typename It, typename ExtractKey>
-inline void StdSortFallback(It begin, It end, ExtractKey & extract_key)
-{
-    std::sort(begin, end, [&](auto && l, auto && r){ return extract_key(l) < extract_key(r); });
+template <typename It, typename ExtractKey>
+inline void StdSortFallback(It begin, It end, ExtractKey& extract_key) {
+  std::sort(begin, end,
+            [&](auto&& l, auto&& r) { return extract_key(l) < extract_key(r); });
 }
 
-template<std::ptrdiff_t StdSortThreshold, typename It, typename ExtractKey>
-inline bool StdSortIfLessThanThreshold(It begin, It end, std::ptrdiff_t num_elements, ExtractKey & extract_key)
-{
-    if (num_elements <= 1)
-        return true;
-    if (num_elements >= StdSortThreshold)
-        return false;
-    StdSortFallback(begin, end, extract_key);
-    return true;
+template <std::ptrdiff_t StdSortThreshold, typename It, typename ExtractKey>
+inline bool StdSortIfLessThanThreshold(It begin, It end, std::ptrdiff_t num_elements,
+                                       ExtractKey& extract_key) {
+  if (num_elements <= 1) return true;
+  if (num_elements >= StdSortThreshold) return false;
+  StdSortFallback(begin, end, extract_key);
+  return true;
 }
 
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey, typename SubKeyType = typename CurrentSubKey::sub_key_type>
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey,
+          typename SubKeyType = typename CurrentSubKey::sub_key_type>
 struct InplaceSorter;
 
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey, size_t NumBytes, size_t Offset = 0>
-struct UnsignedInplaceSorter
-{
-    static constexpr size_t ShiftAmount = (((NumBytes - 1) - Offset) * 8);
-    template<typename T>
-    inline static uint8_t current_byte(T && elem, void * sort_data)
-    {
-        return CurrentSubKey::sub_key(elem, sort_data) >> ShiftAmount;
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey, size_t NumBytes, size_t Offset = 0>
+struct UnsignedInplaceSorter {
+  static constexpr size_t ShiftAmount = (((NumBytes - 1) - Offset) * 8);
+  template <typename T>
+  inline static uint8_t current_byte(T&& elem, void* sort_data) {
+    return CurrentSubKey::sub_key(elem, sort_data) >> ShiftAmount;
+  }
+  template <typename It, typename ExtractKey>
+  static void sort(It begin, It end, std::ptrdiff_t num_elements, ExtractKey& extract_key,
+                   void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey&, void*),
+                   void* sort_data) {
+    if (num_elements < AmericanFlagSortThreshold)
+      american_flag_sort(begin, end, extract_key, next_sort, sort_data);
+    else
+      ska_byte_sort(begin, end, extract_key, next_sort, sort_data);
+  }
+
+  template <typename It, typename ExtractKey>
+  static void american_flag_sort(It begin, It end, ExtractKey& extract_key,
+                                 void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey&,
+                                                   void*),
+                                 void* sort_data) {
+    PartitionInfo partitions[256];
+    for (It it = begin; it != end; ++it) {
+      ++partitions[current_byte(extract_key(*it), sort_data)].count;
     }
-    template<typename It, typename ExtractKey>
-    static void sort(It begin, It end, std::ptrdiff_t num_elements, ExtractKey & extract_key, void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey &, void *), void * sort_data)
-    {
-        if (num_elements < AmericanFlagSortThreshold)
-            american_flag_sort(begin, end, extract_key, next_sort, sort_data);
-        else
-            ska_byte_sort(begin, end, extract_key, next_sort, sort_data);
+    size_t total = 0;
+    uint8_t remaining_partitions[256];
+    int num_partitions = 0;
+    for (int i = 0; i < 256; ++i) {
+      size_t count = partitions[i].count;
+      if (!count) continue;
+      partitions[i].offset = total;
+      total += count;
+      partitions[i].next_offset = total;
+      remaining_partitions[num_partitions] = i;
+      ++num_partitions;
     }
-
-    template<typename It, typename ExtractKey>
-    static void american_flag_sort(It begin, It end, ExtractKey & extract_key, void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey &, void *), void * sort_data)
-    {
-        PartitionInfo partitions[256];
-        for (It it = begin; it != end; ++it)
-        {
-            ++partitions[current_byte(extract_key(*it), sort_data)].count;
-        }
-        size_t total = 0;
-        uint8_t remaining_partitions[256];
-        int num_partitions = 0;
-        for (int i = 0; i < 256; ++i)
-        {
-            size_t count = partitions[i].count;
-            if (!count)
-                continue;
-            partitions[i].offset = total;
-            total += count;
-            partitions[i].next_offset = total;
-            remaining_partitions[num_partitions] = i;
-            ++num_partitions;
-        }
-        if (num_partitions > 1)
-        {
-            uint8_t * current_block_ptr = remaining_partitions;
-            PartitionInfo * current_block = partitions + *current_block_ptr;
-            uint8_t * last_block = remaining_partitions + num_partitions - 1;
-            It it = begin;
-            It block_end = begin + current_block->next_offset;
-            It last_element = end - 1;
-            for (;;)
-            {
-                PartitionInfo * block = partitions + current_byte(extract_key(*it), sort_data);
-                if (block == current_block)
-                {
-                    ++it;
-                    if (it == last_element)
-                        break;
-                    else if (it == block_end)
-                    {
-                        for (;;)
-                        {
-                            ++current_block_ptr;
-                            if (current_block_ptr == last_block)
-                                goto recurse;
-                            current_block = partitions + *current_block_ptr;
-                            if (current_block->offset != current_block->next_offset)
-                                break;
-                        }
-
-                        it = begin + current_block->offset;
-                        block_end = begin + current_block->next_offset;
-                    }
-                }
-                else
-                {
-                    size_t offset = block->offset++;
-                    std::iter_swap(it, begin + offset);
-                }
-            }
-        }
-        recurse:
-        if (Offset + 1 != NumBytes || next_sort)
-        {
-            size_t start_offset = 0;
-            It partition_begin = begin;
-            for (uint8_t * it = remaining_partitions, * end = remaining_partitions + num_partitions; it != end; ++it)
-            {
-                size_t end_offset = partitions[*it].next_offset;
-                It partition_end = begin + end_offset;
-                std::ptrdiff_t num_elements = end_offset - start_offset;
-                if (!StdSortIfLessThanThreshold<StdSortThreshold>(partition_begin, partition_end, num_elements, extract_key))
-                {
-                    UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, NumBytes, Offset + 1>::sort(partition_begin, partition_end, num_elements, extract_key, next_sort, sort_data);
-                }
-                start_offset = end_offset;
-                partition_begin = partition_end;
+    if (num_partitions > 1) {
+      uint8_t* current_block_ptr = remaining_partitions;
+      PartitionInfo* current_block = partitions + *current_block_ptr;
+      uint8_t* last_block = remaining_partitions + num_partitions - 1;
+      It it = begin;
+      It block_end = begin + current_block->next_offset;
+      It last_element = end - 1;
+      for (;;) {
+        PartitionInfo* block = partitions + current_byte(extract_key(*it), sort_data);
+        if (block == current_block) {
+          ++it;
+          if (it == last_element)
+            break;
+          else if (it == block_end) {
+            for (;;) {
+              ++current_block_ptr;
+              if (current_block_ptr == last_block) goto recurse;
+              current_block = partitions + *current_block_ptr;
+              if (current_block->offset != current_block->next_offset) break;
             }
-        }
-    }
 
-    template<typename It, typename ExtractKey>
-    static void ska_byte_sort(It begin, It end, ExtractKey & extract_key, void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey &, void *), void * sort_data)
-    {
-        PartitionInfo partitions[256];
-        for (It it = begin; it != end; ++it)
-        {
-            ++partitions[current_byte(extract_key(*it), sort_data)].count;
+            it = begin + current_block->offset;
+            block_end = begin + current_block->next_offset;
+          }
+        } else {
+          size_t offset = block->offset++;
+          std::iter_swap(it, begin + offset);
         }
-        uint8_t remaining_partitions[256];
-        size_t total = 0;
-        int num_partitions = 0;
-        for (int i = 0; i < 256; ++i)
-        {
-            size_t count = partitions[i].count;
-            if (count)
-            {
-                partitions[i].offset = total;
-                total += count;
-                remaining_partitions[num_partitions] = i;
-                ++num_partitions;
-            }
-            partitions[i].next_offset = total;
+      }
+    }
+  recurse:
+    if (Offset + 1 != NumBytes || next_sort) {
+      size_t start_offset = 0;
+      It partition_begin = begin;
+      for (uint8_t *it = remaining_partitions,
+                   *end = remaining_partitions + num_partitions;
+           it != end; ++it) {
+        size_t end_offset = partitions[*it].next_offset;
+        It partition_end = begin + end_offset;
+        std::ptrdiff_t num_elements = end_offset - start_offset;
+        if (!StdSortIfLessThanThreshold<StdSortThreshold>(partition_begin, partition_end,
+                                                          num_elements, extract_key)) {
+          UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold,
+                                CurrentSubKey, NumBytes,
+                                Offset + 1>::sort(partition_begin, partition_end,
+                                                  num_elements, extract_key, next_sort,
+                                                  sort_data);
         }
-        for (uint8_t * last_remaining = remaining_partitions + num_partitions, * end_partition = remaining_partitions + 1; last_remaining > end_partition;)
-        {
-            last_remaining = custom_std_partition(remaining_partitions, last_remaining, [&](uint8_t partition)
-            {
-                size_t & begin_offset = partitions[partition].offset;
-                size_t & end_offset = partitions[partition].next_offset;
-                if (begin_offset == end_offset)
-                    return false;
-
-                unroll_loop_four_times(begin + begin_offset, end_offset - begin_offset, [partitions = partitions, begin, &extract_key, sort_data](It it)
-                {
-                    uint8_t this_partition = current_byte(extract_key(*it), sort_data);
-                    size_t offset = partitions[this_partition].offset++;
-                    std::iter_swap(it, begin + offset);
+        start_offset = end_offset;
+        partition_begin = partition_end;
+      }
+    }
+  }
+
+  template <typename It, typename ExtractKey>
+  static void ska_byte_sort(It begin, It end, ExtractKey& extract_key,
+                            void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey&, void*),
+                            void* sort_data) {
+    PartitionInfo partitions[256];
+    for (It it = begin; it != end; ++it) {
+      ++partitions[current_byte(extract_key(*it), sort_data)].count;
+    }
+    uint8_t remaining_partitions[256];
+    size_t total = 0;
+    int num_partitions = 0;
+    for (int i = 0; i < 256; ++i) {
+      size_t count = partitions[i].count;
+      if (count) {
+        partitions[i].offset = total;
+        total += count;
+        remaining_partitions[num_partitions] = i;
+        ++num_partitions;
+      }
+      partitions[i].next_offset = total;
+    }
+    for (uint8_t *last_remaining = remaining_partitions + num_partitions,
+                 *end_partition = remaining_partitions + 1;
+         last_remaining > end_partition;) {
+      last_remaining = custom_std_partition(
+          remaining_partitions, last_remaining, [&](uint8_t partition) {
+            size_t& begin_offset = partitions[partition].offset;
+            size_t& end_offset = partitions[partition].next_offset;
+            if (begin_offset == end_offset) return false;
+
+            unroll_loop_four_times(
+                begin + begin_offset, end_offset - begin_offset,
+                [partitions = partitions, begin, &extract_key, sort_data](It it) {
+                  uint8_t this_partition = current_byte(extract_key(*it), sort_data);
+                  size_t offset = partitions[this_partition].offset++;
+                  std::iter_swap(it, begin + offset);
                 });
-                return begin_offset != end_offset;
-            });
-        }
-        if (Offset + 1 != NumBytes || next_sort)
-        {
-            for (uint8_t * it = remaining_partitions + num_partitions; it != remaining_partitions; --it)
-            {
-                uint8_t partition = it[-1];
-                size_t start_offset = (partition == 0 ? 0 : partitions[partition - 1].next_offset);
-                size_t end_offset = partitions[partition].next_offset;
-                It partition_begin = begin + start_offset;
-                It partition_end = begin + end_offset;
-                std::ptrdiff_t num_elements = end_offset - start_offset;
-                if (!StdSortIfLessThanThreshold<StdSortThreshold>(partition_begin, partition_end, num_elements, extract_key))
-                {
-                    UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, NumBytes, Offset + 1>::sort(partition_begin, partition_end, num_elements, extract_key, next_sort, sort_data);
-                }
-            }
+            return begin_offset != end_offset;
+          });
+    }
+    if (Offset + 1 != NumBytes || next_sort) {
+      for (uint8_t* it = remaining_partitions + num_partitions;
+           it != remaining_partitions; --it) {
+        uint8_t partition = it[-1];
+        size_t start_offset =
+            (partition == 0 ? 0 : partitions[partition - 1].next_offset);
+        size_t end_offset = partitions[partition].next_offset;
+        It partition_begin = begin + start_offset;
+        It partition_end = begin + end_offset;
+        std::ptrdiff_t num_elements = end_offset - start_offset;
+        if (!StdSortIfLessThanThreshold<StdSortThreshold>(partition_begin, partition_end,
+                                                          num_elements, extract_key)) {
+          UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold,
+                                CurrentSubKey, NumBytes,
+                                Offset + 1>::sort(partition_begin, partition_end,
+                                                  num_elements, extract_key, next_sort,
+                                                  sort_data);
         }
+      }
     }
-};
-
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey, size_t NumBytes>
-struct UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, NumBytes, NumBytes>
-{
-    template<typename It, typename ExtractKey>
-    inline static void sort(It begin, It end, std::ptrdiff_t num_elements, ExtractKey & extract_key, void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey &, void *), void * next_sort_data)
-    {
-        next_sort(begin, end, num_elements, extract_key, next_sort_data);
+  }
+};
+
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey, size_t NumBytes>
+struct UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey,
+                             NumBytes, NumBytes> {
+  template <typename It, typename ExtractKey>
+  inline static void sort(It begin, It end, std::ptrdiff_t num_elements,
+                          ExtractKey& extract_key,
+                          void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey&, void*),
+                          void* next_sort_data) {
+    next_sort(begin, end, num_elements, extract_key, next_sort_data);
+  }
+};
+
+template <typename It, typename ExtractKey, typename ElementKey>
+size_t CommonPrefix(It begin, It end, size_t start_index, ExtractKey&& extract_key,
+                    ElementKey&& element_key) {
+  const auto& largest_match_list = extract_key(*begin);
+  size_t largest_match = largest_match_list.size();
+  if (largest_match == start_index) return start_index;
+  for (++begin; begin != end; ++begin) {
+    const auto& current_list = extract_key(*begin);
+    size_t current_size = current_list.size();
+    if (current_size < largest_match) {
+      largest_match = current_size;
+      if (largest_match == start_index) return start_index;
     }
-};
-
-template<typename It, typename ExtractKey, typename ElementKey>
-size_t CommonPrefix(It begin, It end, size_t start_index, ExtractKey && extract_key, ElementKey && element_key)
-{
-    const auto & largest_match_list = extract_key(*begin);
-    size_t largest_match = largest_match_list.size();
-    if (largest_match == start_index)
-        return start_index;
-    for (++begin; begin != end; ++begin)
-    {
-        const auto & current_list = extract_key(*begin);
-        size_t current_size = current_list.size();
-        if (current_size < largest_match)
-        {
-            largest_match = current_size;
-            if (largest_match == start_index)
-                return start_index;
-        }
-        if (element_key(largest_match_list[start_index]) != element_key(current_list[start_index]))
-            return start_index;
-        for (size_t i = start_index + 1; i < largest_match; ++i)
-        {
-            if (element_key(largest_match_list[i]) != element_key(current_list[i]))
-            {
-                largest_match = i;
-                break;
-            }
-        }
+    if (element_key(largest_match_list[start_index]) !=
+        element_key(current_list[start_index]))
+      return start_index;
+    for (size_t i = start_index + 1; i < largest_match; ++i) {
+      if (element_key(largest_match_list[i]) != element_key(current_list[i])) {
+        largest_match = i;
+        break;
+      }
     }
-    return largest_match;
+  }
+  return largest_match;
 }
 
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey, typename ListType>
-struct ListInplaceSorter
-{
-    using ElementSubKey = ListElementSubKey<CurrentSubKey, ListType>;
-    template<typename It, typename ExtractKey>
-    static void sort(It begin, It end, ExtractKey & extract_key, ListSortData<It, ExtractKey> * sort_data)
-    {
-        size_t current_index = sort_data->current_index;
-        void * next_sort_data = sort_data->next_sort_data;
-        auto current_key = [&](auto && elem) -> decltype(auto)
-        {
-            return CurrentSubKey::sub_key(extract_key(elem), next_sort_data);
-        };
-        auto element_key = [&](auto && elem) -> decltype(auto)
-        {
-            return ElementSubKey::base::sub_key(elem, sort_data);
-        };
-        sort_data->current_index = current_index = CommonPrefix(begin, end, current_index, current_key, element_key);
-        It end_of_shorter_ones = std::partition(begin, end, [&](auto && elem)
-        {
-            return current_key(elem).size() <= current_index;
-        });
-        std::ptrdiff_t num_shorter_ones = end_of_shorter_ones - begin;
-        if (sort_data->next_sort && !StdSortIfLessThanThreshold<StdSortThreshold>(begin, end_of_shorter_ones, num_shorter_ones, extract_key))
-        {
-            sort_data->next_sort(begin, end_of_shorter_ones, num_shorter_ones, extract_key, next_sort_data);
-        }
-        std::ptrdiff_t num_elements = end - end_of_shorter_ones;
-        if (!StdSortIfLessThanThreshold<StdSortThreshold>(end_of_shorter_ones, end, num_elements, extract_key))
-        {
-            void (*sort_next_element)(It, It, std::ptrdiff_t, ExtractKey &, void *) = static_cast<void (*)(It, It, std::ptrdiff_t, ExtractKey &, void *)>(&sort_from_recursion);
-            InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, ElementSubKey>::sort(end_of_shorter_ones, end, num_elements, extract_key, sort_next_element, sort_data);
-        }
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey, typename ListType>
+struct ListInplaceSorter {
+  using ElementSubKey = ListElementSubKey<CurrentSubKey, ListType>;
+  template <typename It, typename ExtractKey>
+  static void sort(It begin, It end, ExtractKey& extract_key,
+                   ListSortData<It, ExtractKey>* sort_data) {
+    size_t current_index = sort_data->current_index;
+    void* next_sort_data = sort_data->next_sort_data;
+    auto current_key = [&](auto&& elem) -> decltype(auto) {
+      return CurrentSubKey::sub_key(extract_key(elem), next_sort_data);
+    };
+    auto element_key = [&](auto&& elem) -> decltype(auto) {
+      return ElementSubKey::base::sub_key(elem, sort_data);
+    };
+    sort_data->current_index = current_index =
+        CommonPrefix(begin, end, current_index, current_key, element_key);
+    It end_of_shorter_ones = std::partition(begin, end, [&](auto&& elem) {
+      return current_key(elem).size() <= current_index;
+    });
+    std::ptrdiff_t num_shorter_ones = end_of_shorter_ones - begin;
+    if (sort_data->next_sort &&
+        !StdSortIfLessThanThreshold<StdSortThreshold>(begin, end_of_shorter_ones,
+                                                      num_shorter_ones, extract_key)) {
+      sort_data->next_sort(begin, end_of_shorter_ones, num_shorter_ones, extract_key,
+                           next_sort_data);
     }
-
-    template<typename It, typename ExtractKey>
-    static void sort_from_recursion(It begin, It end, std::ptrdiff_t, ExtractKey & extract_key, void * next_sort_data)
-    {
-        ListSortData<It, ExtractKey> offset = *static_cast<ListSortData<It, ExtractKey> *>(next_sort_data);
-        ++offset.current_index;
-        --offset.recursion_limit;
-        if (offset.recursion_limit == 0)
-        {
-            StdSortFallback(begin, end, extract_key);
-        }
-        else
-        {
-            sort(begin, end, extract_key, &offset);
-        }
+    std::ptrdiff_t num_elements = end - end_of_shorter_ones;
+    if (!StdSortIfLessThanThreshold<StdSortThreshold>(end_of_shorter_ones, end,
+                                                      num_elements, extract_key)) {
+      void (*sort_next_element)(It, It, std::ptrdiff_t, ExtractKey&, void*) =
+          static_cast<void (*)(It, It, std::ptrdiff_t, ExtractKey&, void*)>(
+              &sort_from_recursion);
+      InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, ElementSubKey>::sort(
+          end_of_shorter_ones, end, num_elements, extract_key, sort_next_element,
+          sort_data);
     }
-
-
-    template<typename It, typename ExtractKey>
-    static void sort(It begin, It end, std::ptrdiff_t, ExtractKey & extract_key, void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey &, void *), void * next_sort_data)
-    {
-        ListSortData<It, ExtractKey> offset;
-        offset.current_index = 0;
-        offset.recursion_limit = 16;
-        offset.next_sort = next_sort;
-        offset.next_sort_data = next_sort_data;
-        sort(begin, end, extract_key, &offset);
+  }
+
+  template <typename It, typename ExtractKey>
+  static void sort_from_recursion(It begin, It end, std::ptrdiff_t,
+                                  ExtractKey& extract_key, void* next_sort_data) {
+    ListSortData<It, ExtractKey> offset =
+        *static_cast<ListSortData<It, ExtractKey>*>(next_sort_data);
+    ++offset.current_index;
+    --offset.recursion_limit;
+    if (offset.recursion_limit == 0) {
+      StdSortFallback(begin, end, extract_key);
+    } else {
+      sort(begin, end, extract_key, &offset);
     }
-};
-
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey>
-struct InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, bool>
-{
-    template<typename It, typename ExtractKey>
-    static void sort(It begin, It end, std::ptrdiff_t, ExtractKey & extract_key, void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey &, void *), void * sort_data)
-    {
-        It middle = std::partition(begin, end, [&](auto && a){ return !CurrentSubKey::sub_key(extract_key(a), sort_data); });
-        if (next_sort)
-        {
-            next_sort(begin, middle, middle - begin, extract_key, sort_data);
-            next_sort(middle, end, end - middle, extract_key, sort_data);
-        }
+  }
+
+  template <typename It, typename ExtractKey>
+  static void sort(It begin, It end, std::ptrdiff_t, ExtractKey& extract_key,
+                   void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey&, void*),
+                   void* next_sort_data) {
+    ListSortData<It, ExtractKey> offset;
+    offset.current_index = 0;
+    offset.recursion_limit = 16;
+    offset.next_sort = next_sort;
+    offset.next_sort_data = next_sort_data;
+    sort(begin, end, extract_key, &offset);
+  }
+};
+
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey>
+struct InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, bool> {
+  template <typename It, typename ExtractKey>
+  static void sort(It begin, It end, std::ptrdiff_t, ExtractKey& extract_key,
+                   void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey&, void*),
+                   void* sort_data) {
+    It middle = std::partition(begin, end, [&](auto&& a) {
+      return !CurrentSubKey::sub_key(extract_key(a), sort_data);
+    });
+    if (next_sort) {
+      next_sort(begin, middle, middle - begin, extract_key, sort_data);
+      next_sort(middle, end, end - middle, extract_key, sort_data);
     }
-};
-
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey>
-struct InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, uint8_t> : UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, 1>
-{
-};
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey>
-struct InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, uint16_t> : UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, 2>
-{
-};
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey>
-struct InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, uint32_t> : UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, 4>
-{
-};
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey>
-struct InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, uint64_t> : UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, 8>
-{
-};
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey, typename SubKeyType, typename Enable = void>
+  }
+};
+
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey>
+struct InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, uint8_t>
+    : UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey,
+                            1> {};
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey>
+struct InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, uint16_t>
+    : UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey,
+                            2> {};
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey>
+struct InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, uint32_t>
+    : UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey,
+                            4> {};
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey>
+struct InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, uint64_t>
+    : UnsignedInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey,
+                            8> {};
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey, typename SubKeyType, typename Enable = void>
 struct FallbackInplaceSorter;
 
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey, typename SubKeyType>
-struct InplaceSorter : FallbackInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, SubKeyType>
-{
-};
-
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey, typename SubKeyType>
-struct FallbackInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, SubKeyType, typename std::enable_if<has_subscript_operator<SubKeyType>::value>::type>
-	: ListInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, SubKeyType>
-{
-};
-
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey>
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey, typename SubKeyType>
+struct InplaceSorter : FallbackInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold,
+                                             CurrentSubKey, SubKeyType> {};
+
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey, typename SubKeyType>
+struct FallbackInplaceSorter<
+    StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey, SubKeyType,
+    typename std::enable_if<has_subscript_operator<SubKeyType>::value>::type>
+    : ListInplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey,
+                        SubKeyType> {};
+
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey>
 struct SortStarter;
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold>
-struct SortStarter<StdSortThreshold, AmericanFlagSortThreshold, SubKey<void>>
-{
-    template<typename It, typename ExtractKey>
-    static void sort(It, It, std::ptrdiff_t, ExtractKey &, void *)
-    {
-    }
-};
-
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename CurrentSubKey>
-struct SortStarter
-{
-    template<typename It, typename ExtractKey>
-    static void sort(It begin, It end, std::ptrdiff_t num_elements, ExtractKey & extract_key, void * next_sort_data = nullptr)
-    {
-        if (StdSortIfLessThanThreshold<StdSortThreshold>(begin, end, num_elements, extract_key))
-            return;
-
-        void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey &, void *) = static_cast<void (*)(It, It, std::ptrdiff_t, ExtractKey &, void *)>(&SortStarter<StdSortThreshold, AmericanFlagSortThreshold, typename CurrentSubKey::next>::sort);
-        if (next_sort == static_cast<void (*)(It, It, std::ptrdiff_t, ExtractKey &, void *)>(&SortStarter<StdSortThreshold, AmericanFlagSortThreshold, SubKey<void>>::sort))
-            next_sort = nullptr;
-        InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey>::sort(begin, end, num_elements, extract_key, next_sort, next_sort_data);
-    }
-};
-
-template<std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold, typename It, typename ExtractKey>
-void inplace_radix_sort(It begin, It end, ExtractKey & extract_key)
-{
-    using SubKey = SubKey<decltype(extract_key(*begin))>;
-    SortStarter<StdSortThreshold, AmericanFlagSortThreshold, SubKey>::sort(begin, end, end - begin, extract_key);
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold>
+struct SortStarter<StdSortThreshold, AmericanFlagSortThreshold, SubKey<void>> {
+  template <typename It, typename ExtractKey>
+  static void sort(It, It, std::ptrdiff_t, ExtractKey&, void*) {}
+};
+
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename CurrentSubKey>
+struct SortStarter {
+  template <typename It, typename ExtractKey>
+  static void sort(It begin, It end, std::ptrdiff_t num_elements, ExtractKey& extract_key,
+                   void* next_sort_data = nullptr) {
+    if (StdSortIfLessThanThreshold<StdSortThreshold>(begin, end, num_elements,
+                                                     extract_key))
+      return;
+
+    void (*next_sort)(It, It, std::ptrdiff_t, ExtractKey&, void*) =
+        static_cast<void (*)(It, It, std::ptrdiff_t, ExtractKey&, void*)>(
+            &SortStarter<StdSortThreshold, AmericanFlagSortThreshold,
+                         typename CurrentSubKey::next>::sort);
+    if (next_sort == static_cast<void (*)(It, It, std::ptrdiff_t, ExtractKey&, void*)>(
+                         &SortStarter<StdSortThreshold, AmericanFlagSortThreshold,
+                                      SubKey<void>>::sort))
+      next_sort = nullptr;
+    InplaceSorter<StdSortThreshold, AmericanFlagSortThreshold, CurrentSubKey>::sort(
+        begin, end, num_elements, extract_key, next_sort, next_sort_data);
+  }
+};
+
+template <std::ptrdiff_t StdSortThreshold, std::ptrdiff_t AmericanFlagSortThreshold,
+          typename It, typename ExtractKey>
+void inplace_radix_sort(It begin, It end, ExtractKey& extract_key) {
+  using SubKey = SubKey<decltype(extract_key(*begin))>;
+  SortStarter<StdSortThreshold, AmericanFlagSortThreshold, SubKey>::sort(
+      begin, end, end - begin, extract_key);
 }
 
-struct IdentityFunctor
-{
-    template<typename T>
-    decltype(auto) operator()(T && i) const
-    {
-        return std::forward<T>(i);
-    }
+struct IdentityFunctor {
+  template <typename T>
+  decltype(auto) operator()(T&& i) const {
+    return std::forward<T>(i);
+  }
 };
-}
+}  // namespace detail
 
-template<typename It, typename ExtractKey>
-static void ska_sort(It begin, It end, ExtractKey && extract_key)
-{
-    detail::inplace_radix_sort<128, 1024>(begin, end, extract_key);
+template <typename It, typename ExtractKey>
+static void ska_sort(It begin, It end, ExtractKey&& extract_key) {
+  detail::inplace_radix_sort<128, 1024>(begin, end, extract_key);
 }
 
-template<typename It>
-static void ska_sort(It begin, It end)
-{
-    ska_sort(begin, end, detail::IdentityFunctor());
+template <typename It>
+static void ska_sort(It begin, It end) {
+  ska_sort(begin, end, detail::IdentityFunctor());
 }
 
-template<typename It, typename OutIt, typename ExtractKey>
-bool ska_sort_copy(It begin, It end, OutIt buffer_begin, ExtractKey && key)
-{
-    std::ptrdiff_t num_elements = end - begin;
-    if (num_elements < 128 || detail::radix_sort_pass_count<typename std::result_of<ExtractKey(decltype(*begin))>::type> >= 8)
-    {
-        ska_sort(begin, end, key);
-        return false;
-    }
-    else
-        return detail::RadixSorter<typename std::result_of<ExtractKey(decltype(*begin))>::type>::sort(begin, end, buffer_begin, key);
+template <typename It, typename OutIt, typename ExtractKey>
+bool ska_sort_copy(It begin, It end, OutIt buffer_begin, ExtractKey&& key) {
+  std::ptrdiff_t num_elements = end - begin;
+  if (num_elements < 128 ||
+      detail::radix_sort_pass_count<
+          typename std::result_of<ExtractKey(decltype(*begin))>::type> >= 8) {
+    ska_sort(begin, end, key);
+    return false;
+  } else
+    return detail::RadixSorter<
+        typename std::result_of<ExtractKey(decltype(*begin))>::type>::sort(begin, end,
+                                                                           buffer_begin,
+                                                                           key);
 }
-template<typename It, typename OutIt>
-bool ska_sort_copy(It begin, It end, OutIt buffer_begin)
-{
-    return ska_sort_copy(begin, end, buffer_begin, detail::IdentityFunctor());
+template <typename It, typename OutIt>
+bool ska_sort_copy(It begin, It end, OutIt buffer_begin) {
+  return ska_sort_copy(begin, end, buffer_begin, detail::IdentityFunctor());
 }
diff --git a/native-sql-engine/cpp/src/third_party/sparsehash/internal/densehashtable.h b/native-sql-engine/cpp/src/third_party/sparsehash/internal/densehashtable.h
index 4c48c2c10..06d7b4890 100644
--- a/native-sql-engine/cpp/src/third_party/sparsehash/internal/densehashtable.h
+++ b/native-sql-engine/cpp/src/third_party/sparsehash/internal/densehashtable.h
@@ -106,16 +106,17 @@
 #pragma once
 
 #include <assert.h>
-#include <stdio.h>    // for FILE, fwrite, fread
+#include <sparsehash/internal/hashtable-common.h>
+#include <sparsehash/internal/libc_allocator_with_realloc.h>
+#include <stdio.h>  // for FILE, fwrite, fread
+
 #include <algorithm>  // For swap(), eg
 #include <iterator>   // For iterator tags
 #include <limits>     // for numeric_limits
 #include <memory>     // For uninitialized_fill
-#include <utility>    // for pair
 #include <stdexcept>  // For length_error
 #include <type_traits>
-#include <sparsehash/internal/hashtable-common.h>
-#include <sparsehash/internal/libc_allocator_with_realloc.h>
+#include <utility>  // for pair
 
 namespace google {
 
@@ -156,13 +157,11 @@ struct dense_hashtable_const_iterator;
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
 struct dense_hashtable_iterator {
  private:
-  using value_alloc_type =
-      typename std::allocator_traits<A>::template rebind_alloc<V>;
+  using value_alloc_type = typename std::allocator_traits<A>::template rebind_alloc<V>;
 
  public:
   typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
-  typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
-      const_iterator;
+  typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> const_iterator;
 
   typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
@@ -172,9 +171,8 @@ struct dense_hashtable_iterator {
   typedef typename value_alloc_type::pointer pointer;
 
   // "Real" constructor and default constructor
-  dense_hashtable_iterator(
-      const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, pointer it,
-      pointer it_end, bool advance)
+  dense_hashtable_iterator(const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h,
+                           pointer it, pointer it_end, bool advance)
       : ht(h), pos(it), end(it_end) {
     if (advance) advance_past_empty_and_deleted();
   }
@@ -189,8 +187,7 @@ struct dense_hashtable_iterator {
   // Arithmetic.  The only hard part is making sure that
   // we're not on an empty or marked-deleted array element
   void advance_past_empty_and_deleted() {
-    while (pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)))
-      ++pos;
+    while (pos != end && (ht->test_empty(*this) || ht->test_deleted(*this))) ++pos;
   }
   iterator& operator++() {
     assert(pos != end);
@@ -217,13 +214,11 @@ struct dense_hashtable_iterator {
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
 struct dense_hashtable_const_iterator {
  private:
-  using value_alloc_type =
-      typename std::allocator_traits<A>::template rebind_alloc<V>;
+  using value_alloc_type = typename std::allocator_traits<A>::template rebind_alloc<V>;
 
  public:
   typedef dense_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
-  typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
-      const_iterator;
+  typedef dense_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> const_iterator;
 
   typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
@@ -233,9 +228,8 @@ struct dense_hashtable_const_iterator {
   typedef typename value_alloc_type::const_pointer pointer;
 
   // "Real" constructor and default constructor
-  dense_hashtable_const_iterator(
-      const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, pointer it,
-      pointer it_end, bool advance)
+  dense_hashtable_const_iterator(const dense_hashtable<V, K, HF, ExK, SetK, EqK, A>* h,
+                                 pointer it, pointer it_end, bool advance)
       : ht(h), pos(it), end(it_end) {
     if (advance) advance_past_empty_and_deleted();
   }
@@ -253,8 +247,7 @@ struct dense_hashtable_const_iterator {
   // Arithmetic.  The only hard part is making sure that
   // we're not on an empty or marked-deleted array element
   void advance_past_empty_and_deleted() {
-    while (pos != end && (ht->test_empty(*this) || ht->test_deleted(*this)))
-      ++pos;
+    while (pos != end && (ht->test_empty(*this) || ht->test_deleted(*this))) ++pos;
   }
   const_iterator& operator++() {
     assert(pos != end);
@@ -297,11 +290,13 @@ class dense_hashtable {
   typedef typename value_alloc_type::const_reference const_reference;
   typedef typename value_alloc_type::pointer pointer;
   typedef typename value_alloc_type::const_pointer const_pointer;
-  typedef dense_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey,
-                                   EqualKey, Alloc> iterator;
+  typedef dense_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey, EqualKey,
+                                   Alloc>
+      iterator;
 
-  typedef dense_hashtable_const_iterator<
-      Value, Key, HashFcn, ExtractKey, SetKey, EqualKey, Alloc> const_iterator;
+  typedef dense_hashtable_const_iterator<Value, Key, HashFcn, ExtractKey, SetKey,
+                                         EqualKey, Alloc>
+      const_iterator;
 
   // These come from tr1.  For us they're the same as regular iterators.
   typedef iterator local_iterator;
@@ -389,12 +384,12 @@ class dense_hashtable {
   // (NB: while you pass in an entire value, only the key part is looked
   // at.  This is just because I don't know how to assign just a key.)
  private:
-  void squash_deleted() {          // gets rid of any deleted entries we have
-    if (num_deleted) {             // get rid of deleted before writing
-      size_type resize_to = settings.min_buckets(
-          num_elements, bucket_count());
-      dense_hashtable tmp(std::move(*this), resize_to);  // copying will get rid of deleted
-      swap(tmp);                   // now we are tmp
+  void squash_deleted() {  // gets rid of any deleted entries we have
+    if (num_deleted) {     // get rid of deleted before writing
+      size_type resize_to = settings.min_buckets(num_elements, bucket_count());
+      dense_hashtable tmp(std::move(*this),
+                          resize_to);  // copying will get rid of deleted
+      swap(tmp);                       // now we are tmp
     }
     assert(num_deleted == 0);
   }
@@ -411,9 +406,8 @@ class dense_hashtable {
   void set_deleted_key(const key_type& key) {
     // the empty indicator (if specified) and the deleted indicator
     // must be different
-    assert(
-        (!settings.use_empty() || !equals(key, key_info.empty_key)) &&
-        "Passed the empty-key to set_deleted_key");
+    assert((!settings.use_empty() || !equals(key, key_info.empty_key)) &&
+           "Passed the empty-key to set_deleted_key");
     // It's only safe to change what "deleted" means if we purge deleted guys
     squash_deleted();
     settings.set_use_deleted(true);
@@ -424,8 +418,7 @@ class dense_hashtable {
     settings.set_use_deleted(false);
   }
   key_type deleted_key() const {
-    assert(settings.use_deleted() &&
-           "Must set deleted key before calling deleted_key");
+    assert(settings.use_deleted() && "Must set deleted key before calling deleted_key");
     return key_info.delkey;
   }
 
@@ -509,8 +502,7 @@ class dense_hashtable {
 
  private:
   void fill_range_with_empty(pointer table_start, size_type count) {
-    for (size_type i = 0; i < count; ++i)
-    {
+    for (size_type i = 0; i < count; ++i) {
       construct_key(&table_start[i], key_info.empty_key);
     }
   }
@@ -521,9 +513,8 @@ class dense_hashtable {
     assert(!settings.use_empty() && "Calling set_empty_key multiple times");
     // The deleted indicator (if specified) and the empty indicator
     // must be different.
-    assert(
-        (!settings.use_deleted() || !equals(key, key_info.delkey)) &&
-        "Setting the empty key the same as the deleted key");
+    assert((!settings.use_deleted() || !equals(key, key_info.delkey)) &&
+           "Setting the empty key the same as the deleted key");
     settings.set_use_empty(true);
     key_info.empty_key = key;
 
@@ -548,9 +539,7 @@ class dense_hashtable {
   size_type nonempty_bucket_count() const { return num_elements; }
   // These are tr1 methods.  Their idea of 'bucket' doesn't map well to
   // what we do.  We just say every bucket has 0 or 1 items in it.
-  size_type bucket_size(size_type i) const {
-    return begin(i) == end(i) ? 0 : 1;
-  }
+  size_type bucket_size(size_type i) const { return begin(i) == end(i) ? 0 : 1; }
 
  private:
   // Because of the above, size_type(-1) is never legal; use it for errors
@@ -576,12 +565,11 @@ class dense_hashtable {
         bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
       const float shrink_factor = settings.shrink_factor();
       size_type sz = bucket_count() / 2;  // find how much we should shrink
-      while (sz > HT_DEFAULT_STARTING_BUCKETS &&
-             num_remain < sz * shrink_factor) {
+      while (sz > HT_DEFAULT_STARTING_BUCKETS && num_remain < sz * shrink_factor) {
         sz /= 2;  // stay a power of 2
       }
       dense_hashtable tmp(std::move(*this), sz);  // Do the actual resizing
-      swap(tmp);                       // now we are tmp
+      swap(tmp);                                  // now we are tmp
       retval = true;
     }
     settings.set_consider_shrink(false);  // because we just considered it
@@ -613,8 +601,8 @@ class dense_hashtable {
     if (needed_size <= bucket_count())  // we have enough buckets
       return did_resize;
 
-    size_type resize_to = settings.min_buckets(
-        num_elements - num_deleted + delta, bucket_count());
+    size_type resize_to =
+        settings.min_buckets(num_elements - num_deleted + delta, bucket_count());
 
     // When num_deleted is large, we may still grow but we do not want to
     // over expand.  So we reduce needed_size by a portion of num_deleted
@@ -645,8 +633,7 @@ class dense_hashtable {
   }
 
   // We require table be not-NULL and empty before calling this.
-  void resize_table(size_type /*old_size*/, size_type new_size,
-                    std::true_type) {
+  void resize_table(size_type /*old_size*/, size_type new_size, std::true_type) {
     table = val_info.realloc_or_die(table, new_size);
   }
 
@@ -670,15 +657,15 @@ class dense_hashtable {
       const size_type bucket_count_minus_one = bucket_count() - 1;
       for (bucknum = hash(get_key(value)) & bucket_count_minus_one;
            !test_empty(bucknum);  // not empty
-           bucknum =
-               (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
+           bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
         ++num_probes;
         assert(num_probes < bucket_count() &&
                "Hashtable is full: an error in key_equal<> or hash<>");
       }
 
       using will_move = std::is_rvalue_reference<Hashtable&&>;
-      using value_t = typename std::conditional<will_move::value, value_type&&, const_reference>::type;
+      using value_t = typename std::conditional<will_move::value, value_type&&,
+                                                const_reference>::type;
 
       set_value(&table[bucknum], std::forward<value_t>(value));
       num_elements++;
@@ -717,8 +704,7 @@ class dense_hashtable {
                            const HashFcn& hf = HashFcn(),
                            const EqualKey& eql = EqualKey(),
                            const ExtractKey& ext = ExtractKey(),
-                           const SetKey& set = SetKey(),
-                           const Alloc& alloc = Alloc())
+                           const SetKey& set = SetKey(), const Alloc& alloc = Alloc())
       : settings(hf),
         key_info(ext, set, eql),
         num_deleted(0),
@@ -752,16 +738,13 @@ class dense_hashtable {
       return;
     }
     settings.reset_thresholds(bucket_count());
-    copy_or_move_from(ht, min_buckets_wanted);  // copy_or_move_from() ignores deleted entries
+    copy_or_move_from(ht,
+                      min_buckets_wanted);  // copy_or_move_from() ignores deleted entries
   }
 
-  dense_hashtable(dense_hashtable&& ht)
-      : dense_hashtable() {
-    swap(ht);
-  }
+  dense_hashtable(dense_hashtable&& ht) : dense_hashtable() { swap(ht); }
 
-  dense_hashtable(dense_hashtable&& ht,
-                  size_type min_buckets_wanted)
+  dense_hashtable(dense_hashtable&& ht, size_type min_buckets_wanted)
       : settings(ht.settings),
         key_info(ht.key_info),
         num_deleted(0),
@@ -770,14 +753,16 @@ class dense_hashtable {
         val_info(std::move(ht.val_info)),
         table(NULL) {
     if (!ht.settings.use_empty()) {
-      // If use_empty isn't set, copy_or_move_from will crash, so we do our own copying.
+      // If use_empty isn't set, copy_or_move_from will crash, so we do our own
+      // copying.
       assert(ht.empty());
       num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted);
       settings.reset_thresholds(bucket_count());
       return;
     }
     settings.reset_thresholds(bucket_count());
-    copy_or_move_from(std::move(ht), min_buckets_wanted);  // copy_or_move_from() ignores deleted entries
+    copy_or_move_from(std::move(ht),
+                      min_buckets_wanted);  // copy_or_move_from() ignores deleted entries
   }
 
   dense_hashtable& operator=(const dense_hashtable& ht) {
@@ -797,7 +782,7 @@ class dense_hashtable {
   }
 
   dense_hashtable& operator=(dense_hashtable&& ht) {
-    assert(&ht != this); // this should not happen
+    assert(&ht != this);  // this should not happen
     swap(ht);
     return *this;
   }
@@ -922,8 +907,7 @@ class dense_hashtable {
     if (pos.first == ILLEGAL_BUCKET)  // alas, not there
       return end();
     else
-      return const_iterator(this, table + pos.first, table + num_buckets,
-                            false);
+      return const_iterator(this, table + pos.first, table + num_buckets, false);
   }
 
   // This is a tr1 method: the bucket a given key is in, or what bucket
@@ -952,8 +936,7 @@ class dense_hashtable {
     }
   }
   template <typename K>
-  std::pair<const_iterator, const_iterator> equal_range(
-      const K& key) const {
+  std::pair<const_iterator, const_iterator> equal_range(const K& key) const {
     const_iterator pos = find(key);  // either an iterator or end
     if (pos == end()) {
       return std::pair<const_iterator, const_iterator>(pos, pos);
@@ -989,8 +972,10 @@ class dense_hashtable {
   std::pair<iterator, bool> insert_noresize(K&& key, Args&&... args) {
     // First, double-check we're not inserting delkey or emptyval
     assert(settings.use_empty() && "Inserting without empty key");
-    assert(!equals(std::forward<K>(key), key_info.empty_key) && "Inserting the empty key");
-    assert((!settings.use_deleted() || !equals(key, key_info.delkey)) && "Inserting the deleted key");
+    assert(!equals(std::forward<K>(key), key_info.empty_key) &&
+           "Inserting the empty key");
+    assert((!settings.use_deleted() || !equals(key, key_info.delkey)) &&
+           "Inserting the deleted key");
 
     const std::pair<size_type, size_type> pos = find_position(key);
     if (pos.first != ILLEGAL_BUCKET) {  // object was already there
@@ -998,7 +983,8 @@ class dense_hashtable {
           iterator(this, table + pos.first, table + num_buckets, false),
           false);  // false: we didn't insert
     } else {       // pos.second says where to put it
-      return std::pair<iterator, bool>(insert_at(pos.second, std::forward<Args>(args)...), true);
+      return std::pair<iterator, bool>(insert_at(pos.second, std::forward<Args>(args)...),
+                                       true);
     }
   }
 
@@ -1033,11 +1019,14 @@ class dense_hashtable {
   template <typename K, typename... Args>
   std::pair<iterator, bool> emplace(K&& key, Args&&... args) {
     resize_delta(1);
-    // here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself
-    return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...);
+    // here we push key twice as we need it once for the indexing, and the rest
+    // of the params are for the emplace itself
+    return insert_noresize(std::forward<K>(key), std::forward<K>(key),
+                           std::forward<Args>(args)...);
   }
 
-  /* Overload for maps: Here, K != V, and we need to pass hint->first to the equal() function. */
+  /* Overload for maps: Here, K != V, and we need to pass hint->first to the
+   * equal() function. */
   template <typename K, typename... Args, typename KeyCopy = Key>
   typename std::enable_if<!std::is_same<KeyCopy, Value>::value,
                           std::pair<iterator, bool>>::type
@@ -1045,14 +1034,19 @@ class dense_hashtable {
     resize_delta(1);
 
     if ((hint != this->end()) && (equals(key, hint->first))) {
-        return {iterator(this, const_cast<pointer>(hint.pos), const_cast<pointer>(hint.end), false), false};
+      return {iterator(this, const_cast<pointer>(hint.pos), const_cast<pointer>(hint.end),
+                       false),
+              false};
     }
 
-    // here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself
-    return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...);
+    // here we push key twice as we need it once for the indexing, and the rest
+    // of the params are for the emplace itself
+    return insert_noresize(std::forward<K>(key), std::forward<K>(key),
+                           std::forward<Args>(args)...);
   }
 
-  /* Overload for sets: Here, K == V, and we need to pass *hint to the equal() function. */
+  /* Overload for sets: Here, K == V, and we need to pass *hint to the equal()
+   * function. */
   template <typename K, typename... Args, typename KeyCopy = Key>
   typename std::enable_if<std::is_same<KeyCopy, Value>::value,
                           std::pair<iterator, bool>>::type
@@ -1060,19 +1054,22 @@ class dense_hashtable {
     resize_delta(1);
 
     if ((hint != this->end()) && (equals(key, *hint))) {
-      return {iterator(this, const_cast<pointer>(hint.pos), const_cast<pointer>(hint.end), false), false};
+      return {iterator(this, const_cast<pointer>(hint.pos), const_cast<pointer>(hint.end),
+                       false),
+              false};
     }
 
-    // here we push key twice as we need it once for the indexing, and the rest of the params are for the emplace itself
-    return insert_noresize(std::forward<K>(key), std::forward<K>(key), std::forward<Args>(args)...);
+    // here we push key twice as we need it once for the indexing, and the rest
+    // of the params are for the emplace itself
+    return insert_noresize(std::forward<K>(key), std::forward<K>(key),
+                           std::forward<Args>(args)...);
   }
 
   // When inserting a lot at a time, we specialize on the type of iterator
   template <class InputIterator>
   void insert(InputIterator f, InputIterator l) {
     // specializes on iterator type
-    insert(f, l,
-           typename std::iterator_traits<InputIterator>::iterator_category());
+    insert(f, l, typename std::iterator_traits<InputIterator>::iterator_category());
   }
 
   // DefaultValue is a functor that takes a key and returns a value_type
@@ -1080,9 +1077,8 @@ class dense_hashtable {
   template <class T, class K>
   value_type& find_or_insert(K&& key) {
     // First, double-check we're not inserting emptykey or delkey
-    assert(
-        (!settings.use_empty() || !equals(key, key_info.empty_key)) &&
-        "Inserting the empty key");
+    assert((!settings.use_empty() || !equals(key, key_info.empty_key)) &&
+           "Inserting the empty key");
     assert((!settings.use_deleted() || !equals(key, key_info.delkey)) &&
            "Inserting the deleted key");
     const std::pair<size_type, size_type> pos = find_position(key);
@@ -1099,9 +1095,8 @@ class dense_hashtable {
   // DELETION ROUTINES
   size_type erase(const key_type& key) {
     // First, double-check we're not trying to erase delkey or emptyval.
-    assert(
-        (!settings.use_empty() || !equals(key, key_info.empty_key)) &&
-        "Erasing the empty key");
+    assert((!settings.use_empty() || !equals(key, key_info.empty_key)) &&
+           "Erasing the empty key");
     assert((!settings.use_deleted() || !equals(key, key_info.delkey)) &&
            "Erasing the deleted key");
     const_iterator pos = find(key);  // shrug: shouldn't need to be const
@@ -1109,9 +1104,8 @@ class dense_hashtable {
       assert(!test_deleted(pos));  // or find() shouldn't have returned it
       set_deleted(pos);
       ++num_deleted;
-      settings.set_consider_shrink(
-          true);  // will think about shrink after next insert
-      return 1;   // because we deleted one thing
+      settings.set_consider_shrink(true);  // will think about shrink after next insert
+      return 1;                            // because we deleted one thing
     } else {
       return 0;  // because we deleted nothing
     }
@@ -1120,12 +1114,12 @@ class dense_hashtable {
   // We return the iterator past the deleted item.
   iterator erase(const_iterator pos) {
     if (pos == end()) return end();  // sanity check
-    if (set_deleted(pos)) {    // true if object has been newly deleted
+    if (set_deleted(pos)) {          // true if object has been newly deleted
       ++num_deleted;
-      settings.set_consider_shrink(
-          true);  // will think about shrink after next insert
+      settings.set_consider_shrink(true);  // will think about shrink after next insert
     }
-    return iterator(this, const_cast<pointer>(pos.pos), const_cast<pointer>(pos.end), true);
+    return iterator(this, const_cast<pointer>(pos.pos), const_cast<pointer>(pos.end),
+                    true);
   }
 
   iterator erase(const_iterator f, const_iterator l) {
@@ -1133,8 +1127,7 @@ class dense_hashtable {
       if (set_deleted(f))  // should always be true
         ++num_deleted;
     }
-    settings.set_consider_shrink(
-        true);  // will think about shrink after next insert
+    settings.set_consider_shrink(true);  // will think about shrink after next insert
     return iterator(this, const_cast<pointer>(f.pos), const_cast<pointer>(f.end), false);
   }
 
@@ -1182,20 +1175,16 @@ class dense_hashtable {
   template <typename ValueSerializer, typename OUTPUT>
   bool serialize(ValueSerializer serializer, OUTPUT* fp) {
     squash_deleted();  // so we don't have to worry about delkey
-    if (!sparsehash_internal::write_bigendian_number(fp, MAGIC_NUMBER, 4))
-      return false;
-    if (!sparsehash_internal::write_bigendian_number(fp, num_buckets, 8))
-      return false;
-    if (!sparsehash_internal::write_bigendian_number(fp, num_elements, 8))
-      return false;
+    if (!sparsehash_internal::write_bigendian_number(fp, MAGIC_NUMBER, 4)) return false;
+    if (!sparsehash_internal::write_bigendian_number(fp, num_buckets, 8)) return false;
+    if (!sparsehash_internal::write_bigendian_number(fp, num_elements, 8)) return false;
     // Now write a bitmap of non-empty buckets.
     for (size_type i = 0; i < num_buckets; i += 8) {
       unsigned char bits = 0;
       for (int bit = 0; bit < 8; ++bit) {
         if (i + bit < num_buckets && !test_empty(i + bit)) bits |= (1 << bit);
       }
-      if (!sparsehash_internal::write_data(fp, &bits, sizeof(bits)))
-        return false;
+      if (!sparsehash_internal::write_data(fp, &bits, sizeof(bits))) return false;
       for (int bit = 0; bit < 8; ++bit) {
         if (bits & (1 << bit)) {
           if (!serializer(fp, table[i + bit])) return false;
@@ -1213,8 +1202,7 @@ class dense_hashtable {
 
     clear();  // just to be consistent
     MagicNumberType magic_read;
-    if (!sparsehash_internal::read_bigendian_number(fp, &magic_read, 4))
-      return false;
+    if (!sparsehash_internal::read_bigendian_number(fp, &magic_read, 4)) return false;
     if (magic_read != MAGIC_NUMBER) {
       return false;
     }
@@ -1222,14 +1210,12 @@ class dense_hashtable {
     if (!sparsehash_internal::read_bigendian_number(fp, &new_num_buckets, 8))
       return false;
     clear_to_size(new_num_buckets);
-    if (!sparsehash_internal::read_bigendian_number(fp, &num_elements, 8))
-      return false;
+    if (!sparsehash_internal::read_bigendian_number(fp, &num_elements, 8)) return false;
 
     // Read the bitmap of non-empty buckets.
     for (size_type i = 0; i < num_buckets; i += 8) {
       unsigned char bits;
-      if (!sparsehash_internal::read_data(fp, &bits, sizeof(bits)))
-        return false;
+      if (!sparsehash_internal::read_data(fp, &bits, sizeof(bits))) return false;
       for (int bit = 0; bit < 8; ++bit) {
         if (i + bit < num_buckets && (bits & (1 << bit))) {  // not empty
           if (!serializer(fp, &table[i + bit])) return false;
@@ -1293,8 +1279,7 @@ class dense_hashtable {
    public:
     typedef typename alloc_impl<value_alloc_type>::value_type value_type;
 
-    ValInfo(const alloc_impl<value_alloc_type>& a)
-        : alloc_impl<value_alloc_type>(a) {}
+    ValInfo(const alloc_impl<value_alloc_type>& a) : alloc_impl<value_alloc_type>(a) {}
   };
 
   // Package functors with another class to eliminate memory needed for
@@ -1305,8 +1290,8 @@ class dense_hashtable {
       : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
                                                    HT_MIN_BUCKETS> {
     explicit Settings(const hasher& hf)
-        : sparsehash_internal::sh_hashtable_settings<key_type, hasher,
-                                                     size_type, HT_MIN_BUCKETS>(
+        : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
+                                                     HT_MIN_BUCKETS>(
               hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
   };
 
@@ -1321,9 +1306,7 @@ class dense_hashtable {
     typename ExtractKey::result_type get_key(V&& v) const {
       return ExtractKey::operator()(std::forward<V>(v));
     }
-    void set_key(pointer v, const key_type& k) const {
-      SetKey::operator()(v, k);
-    }
+    void set_key(pointer v, const key_type& k) const { SetKey::operator()(v, k); }
     void construct_key(pointer v, const key_type& k) const {
       SetKey::operator()(v, k, true);
     }
@@ -1340,7 +1323,9 @@ class dense_hashtable {
 
   // Utility functions to access the templated operators
   template <typename K>
-  size_type hash(const K& v) const { return settings.hash(v); }
+  size_type hash(const K& v) const {
+    return settings.hash(v);
+  }
   template <typename K1, typename K2>
   bool equals(const K1& a, const K2& b) const {
     return key_info.equals(a, b);
@@ -1389,8 +1374,7 @@ const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = 50;
 // How empty we let the table get before we resize lower.
 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing.
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
-const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT =
-    static_cast<int>(
-        0.4 * dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT);
+const int dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT = static_cast<int>(
+    0.4 * dense_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT);
 
 }  // namespace google
diff --git a/native-sql-engine/cpp/src/third_party/sparsehash/internal/hashtable-common.h b/native-sql-engine/cpp/src/third_party/sparsehash/internal/hashtable-common.h
index 0bcf042f9..e7715e5c2 100644
--- a/native-sql-engine/cpp/src/third_party/sparsehash/internal/hashtable-common.h
+++ b/native-sql-engine/cpp/src/third_party/sparsehash/internal/hashtable-common.h
@@ -56,28 +56,36 @@
 #pragma once
 
 #include <cassert>
-#include <cstdio>
 #include <cstddef>  // for size_t
+#include <cstdio>
 #include <iosfwd>
 #include <stdexcept>  // For length_error
 
 namespace google {
 namespace sparsehash_internal {
 
-template<typename... Ts> struct make_void { typedef void type;};
-template<typename... Ts> using void_t = typename make_void<Ts...>::type;
+template <typename... Ts>
+struct make_void {
+  typedef void type;
+};
+template <typename... Ts>
+using void_t = typename make_void<Ts...>::type;
 
 template <class HashFcn, class = void>
 struct has_is_transparent : std::false_type {};
 
 template <class HashFcn>
-struct has_is_transparent<HashFcn, void_t<typename HashFcn::transparent_key_equal::is_transparent>> : std::true_type {};
+struct has_is_transparent<HashFcn,
+                          void_t<typename HashFcn::transparent_key_equal::is_transparent>>
+    : std::true_type {};
 
 template <class HashFcn, class = void, class = void>
 struct has_transparent_key_equal : std::false_type {};
 
 template <class HashFcn, class Key>
-struct has_transparent_key_equal<HashFcn, Key, void_t<typename HashFcn::transparent_key_equal>> : std::true_type {};
+struct has_transparent_key_equal<HashFcn, Key,
+                                 void_t<typename HashFcn::transparent_key_equal>>
+    : std::true_type {};
 
 template <class HashFcn, class EqualKey, bool = has_transparent_key_equal<HashFcn>::value>
 struct key_equal_chosen {
@@ -123,8 +131,7 @@ inline bool read_data_internal(Ignored*, FILE* fp, void* data, size_t length) {
 }
 
 template <typename Ignored>
-inline bool write_data_internal(Ignored*, FILE* fp, const void* data,
-                                size_t length) {
+inline bool write_data_internal(Ignored*, FILE* fp, const void* data, size_t length) {
   return fwrite(data, length, 1, fp) == 1;
 }
 
@@ -135,13 +142,11 @@ inline bool write_data_internal(Ignored*, FILE* fp, const void* data,
 // it's only legal to delay the instantiation the way we want to if
 // the istream/ostream is a template type.  So we jump through hoops.
 template <typename ISTREAM>
-inline bool read_data_internal_for_istream(ISTREAM* fp, void* data,
-                                           size_t length) {
+inline bool read_data_internal_for_istream(ISTREAM* fp, void* data, size_t length) {
   return fp->read(reinterpret_cast<char*>(data), length).good();
 }
 template <typename Ignored>
-inline bool read_data_internal(Ignored*, std::istream* fp, void* data,
-                               size_t length) {
+inline bool read_data_internal(Ignored*, std::istream* fp, void* data, size_t length) {
   return read_data_internal_for_istream(fp, data, length);
 }
 
@@ -168,8 +173,7 @@ inline bool read_data_internal(INPUT* fp, void*, void* data, size_t length) {
 // The OUTPUT type needs to support a Write() operation that takes
 // a buffer and a length and returns the number of bytes written.
 template <typename OUTPUT>
-inline bool write_data_internal(OUTPUT* fp, void*, const void* data,
-                                size_t length) {
+inline bool write_data_internal(OUTPUT* fp, void*, const void* data, size_t length) {
   return static_cast<size_t>(fp->Write(data, length)) == length;
 }
 
@@ -213,8 +217,7 @@ bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length) {
   for (size_t i = 0; i < length; ++i) {
     byte = (sizeof(value) <= length - 1 - i)
                ? 0
-               : static_cast<unsigned char>((value >> ((length - 1 - i) * 8)) &
-                                            255);
+               : static_cast<unsigned char>((value >> ((length - 1 - i) * 8)) & 255);
     if (!write_data(fp, &byte, sizeof(byte))) return false;
   }
   return true;
@@ -250,14 +253,14 @@ struct pod_serializer {
 // for sure that the hash is the identity hash.  If it's not, this
 // is needless work (and possibly, though not likely, harmful).
 
-template <typename Key, typename HashFunc, typename SizeType,
-          int HT_MIN_BUCKETS>
+template <typename Key, typename HashFunc, typename SizeType, int HT_MIN_BUCKETS>
 class sh_hashtable_settings : public HashFunc {
  public:
   typedef Key key_type;
   typedef HashFunc hasher;
   typedef SizeType size_type;
-  static_assert(!has_transparent_key_equal<HashFunc>::value || has_is_transparent<HashFunc, void>::value,
+  static_assert(!has_transparent_key_equal<HashFunc>::value ||
+                    has_is_transparent<HashFunc, void>::value,
                 "hash provided non-transparent key_equal");
 
  public:
@@ -274,7 +277,7 @@ class sh_hashtable_settings : public HashFunc {
     set_shrink_factor(ht_empty_flt);
   }
 
-  template<typename K>
+  template <typename K>
   size_type hash(const K& v) const {
     // We munge the hash value when we don't trust hasher::operator().
     return hash_munger<Key>::MungedHash(hasher::operator()(v));
@@ -306,9 +309,7 @@ class sh_hashtable_settings : public HashFunc {
   bool use_deleted() const { return use_deleted_; }
   void set_use_deleted(bool t) { use_deleted_ = t; }
 
-  size_type num_ht_copies() const {
-    return static_cast<size_type>(num_ht_copies_);
-  }
+  size_type num_ht_copies() const { return static_cast<size_type>(num_ht_copies_); }
   void inc_num_ht_copies() { ++num_ht_copies_; }
 
   // Reset the enlarge and shrink thresholds
@@ -324,8 +325,7 @@ class sh_hashtable_settings : public HashFunc {
   void set_resizing_parameters(float shrink, float grow) {
     assert(shrink >= 0.0);
     assert(grow <= 1.0);
-    if (shrink > grow / 2.0f)
-      shrink = grow / 2.0f;  // otherwise we thrash hashtable size
+    if (shrink > grow / 2.0f) shrink = grow / 2.0f;  // otherwise we thrash hashtable size
     set_shrink_factor(shrink);
     set_enlarge_factor(grow);
   }
@@ -335,8 +335,7 @@ class sh_hashtable_settings : public HashFunc {
   size_type min_buckets(size_type num_elts, size_type min_buckets_wanted) {
     float enlarge = enlarge_factor();
     size_type sz = HT_MIN_BUCKETS;  // min buckets allowed
-    while (sz < min_buckets_wanted ||
-           num_elts >= static_cast<size_type>(sz * enlarge)) {
+    while (sz < min_buckets_wanted || num_elts >= static_cast<size_type>(sz * enlarge)) {
       // This just prevents overflowing size_type, since sz can exceed
       // max_size() here.
       if (static_cast<size_type>(sz * 2) < sz) {
diff --git a/native-sql-engine/cpp/src/third_party/sparsehash/internal/libc_allocator_with_realloc.h b/native-sql-engine/cpp/src/third_party/sparsehash/internal/libc_allocator_with_realloc.h
index 03a7b3eb6..34df690f8 100644
--- a/native-sql-engine/cpp/src/third_party/sparsehash/internal/libc_allocator_with_realloc.h
+++ b/native-sql-engine/cpp/src/third_party/sparsehash/internal/libc_allocator_with_realloc.h
@@ -47,8 +47,8 @@
 
 #pragma once
 
-#include <cstdlib>  // for malloc/realloc/free
 #include <cstddef>  // for ptrdiff_t
+#include <cstdlib>  // for malloc/realloc/free
 #include <new>      // for placement new
 
 namespace google {
@@ -82,9 +82,7 @@ class libc_allocator_with_realloc {
     return static_cast<pointer>(realloc(static_cast<void*>(p), n * sizeof(value_type)));
   }
 
-  size_type max_size() const {
-    return static_cast<size_type>(-1) / sizeof(value_type);
-  }
+  size_type max_size() const { return static_cast<size_type>(-1) / sizeof(value_type); }
 
   void construct(pointer p, const value_type& val) { new (p) value_type(val); }
   void destroy(pointer p) { p->~value_type(); }
diff --git a/native-sql-engine/cpp/src/third_party/sparsehash/internal/sparsehashtable.h b/native-sql-engine/cpp/src/third_party/sparsehash/internal/sparsehashtable.h
index 65f45da55..cf234e5b7 100644
--- a/native-sql-engine/cpp/src/third_party/sparsehash/internal/sparsehashtable.h
+++ b/native-sql-engine/cpp/src/third_party/sparsehash/internal/sparsehashtable.h
@@ -112,14 +112,15 @@
 #pragma once
 
 #include <assert.h>
-#include <algorithm>    // For swap(), eg
-#include <iterator>     // for iterator tags
-#include <limits>       // for numeric_limits
-#include <utility>      // for pair
-#include <type_traits>  // for remove_const
 #include <sparsehash/internal/hashtable-common.h>
+
+#include <algorithm>               // For swap(), eg
+#include <iterator>                // for iterator tags
+#include <limits>                  // for numeric_limits
 #include <sparsehash/sparsetable>  // IWYU pragma: export
 #include <stdexcept>               // For length_error
+#include <type_traits>             // for remove_const
+#include <utility>                 // for pair
 
 namespace google {
 
@@ -171,15 +172,13 @@ struct sparse_hashtable_const_iterator;
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
 struct sparse_hashtable_iterator {
  private:
-  using value_alloc_type =
-      typename std::allocator_traits<A>::template rebind_alloc<V>;
+  using value_alloc_type = typename std::allocator_traits<A>::template rebind_alloc<V>;
 
  public:
   typedef sparse_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
-  typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
-      const_iterator;
-  typedef typename sparsetable<V, DEFAULT_GROUP_SIZE,
-                               value_alloc_type>::nonempty_iterator st_iterator;
+  typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> const_iterator;
+  typedef typename sparsetable<V, DEFAULT_GROUP_SIZE, value_alloc_type>::nonempty_iterator
+      st_iterator;
 
   typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
@@ -189,9 +188,8 @@ struct sparse_hashtable_iterator {
   typedef typename value_alloc_type::pointer pointer;
 
   // "Real" constructor and default constructor
-  sparse_hashtable_iterator(
-      const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it,
-      st_iterator it_end)
+  sparse_hashtable_iterator(const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h,
+                            st_iterator it, st_iterator it_end)
       : ht(h), pos(it), end(it_end) {
     advance_past_deleted();
   }
@@ -233,16 +231,13 @@ struct sparse_hashtable_iterator {
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
 struct sparse_hashtable_const_iterator {
  private:
-  using value_alloc_type =
-      typename std::allocator_traits<A>::template rebind_alloc<V>;
+  using value_alloc_type = typename std::allocator_traits<A>::template rebind_alloc<V>;
 
  public:
   typedef sparse_hashtable_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
-  typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A>
-      const_iterator;
+  typedef sparse_hashtable_const_iterator<V, K, HF, ExK, SetK, EqK, A> const_iterator;
   typedef typename sparsetable<V, DEFAULT_GROUP_SIZE,
-                               value_alloc_type>::const_nonempty_iterator
-      st_iterator;
+                               value_alloc_type>::const_nonempty_iterator st_iterator;
 
   typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
@@ -252,9 +247,8 @@ struct sparse_hashtable_const_iterator {
   typedef typename value_alloc_type::const_pointer pointer;
 
   // "Real" constructor and default constructor
-  sparse_hashtable_const_iterator(
-      const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h, st_iterator it,
-      st_iterator it_end)
+  sparse_hashtable_const_iterator(const sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>* h,
+                                  st_iterator it, st_iterator it_end)
       : ht(h), pos(it), end(it_end) {
     advance_past_deleted();
   }
@@ -299,15 +293,13 @@ struct sparse_hashtable_const_iterator {
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
 struct sparse_hashtable_destructive_iterator {
  private:
-  using value_alloc_type =
-      typename std::allocator_traits<A>::template rebind_alloc<V>;
+  using value_alloc_type = typename std::allocator_traits<A>::template rebind_alloc<V>;
 
  public:
-  typedef sparse_hashtable_destructive_iterator<V, K, HF, ExK, SetK, EqK, A>
-      iterator;
+  typedef sparse_hashtable_destructive_iterator<V, K, HF, ExK, SetK, EqK, A> iterator;
   typedef
-      typename sparsetable<V, DEFAULT_GROUP_SIZE,
-                           value_alloc_type>::destructive_iterator st_iterator;
+      typename sparsetable<V, DEFAULT_GROUP_SIZE, value_alloc_type>::destructive_iterator
+          st_iterator;
 
   typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
@@ -377,15 +369,17 @@ class sparse_hashtable {
   typedef typename value_alloc_type::const_reference const_reference;
   typedef typename value_alloc_type::pointer pointer;
   typedef typename value_alloc_type::const_pointer const_pointer;
-  typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey,
-                                    EqualKey, Alloc> iterator;
+  typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey, SetKey, EqualKey,
+                                    Alloc>
+      iterator;
 
-  typedef sparse_hashtable_const_iterator<
-      Value, Key, HashFcn, ExtractKey, SetKey, EqualKey, Alloc> const_iterator;
+  typedef sparse_hashtable_const_iterator<Value, Key, HashFcn, ExtractKey, SetKey,
+                                          EqualKey, Alloc>
+      const_iterator;
 
-  typedef sparse_hashtable_destructive_iterator<Value, Key, HashFcn, ExtractKey,
-                                                SetKey, EqualKey,
-                                                Alloc> destructive_iterator;
+  typedef sparse_hashtable_destructive_iterator<Value, Key, HashFcn, ExtractKey, SetKey,
+                                                EqualKey, Alloc>
+      destructive_iterator;
 
   // These come from tr1.  For us they're the same as regular iterators.
   typedef iterator local_iterator;
@@ -416,9 +410,7 @@ class sparse_hashtable {
   iterator begin() {
     return iterator(this, table.nonempty_begin(), table.nonempty_end());
   }
-  iterator end() {
-    return iterator(this, table.nonempty_end(), table.nonempty_end());
-  }
+  iterator end() { return iterator(this, table.nonempty_end(), table.nonempty_end()); }
   const_iterator begin() const {
     return const_iterator(this, table.nonempty_begin(), table.nonempty_end());
   }
@@ -444,11 +436,9 @@ class sparse_hashtable {
   }
   const_local_iterator begin(size_type i) const {
     if (table.test(i))
-      return const_local_iterator(this, table.get_iter(i),
-                                  table.nonempty_end());
+      return const_local_iterator(this, table.get_iter(i), table.nonempty_end());
     else
-      return const_local_iterator(this, table.nonempty_end(),
-                                  table.nonempty_end());
+      return const_local_iterator(this, table.nonempty_end(), table.nonempty_end());
   }
   const_local_iterator end(size_type i) const {
     const_local_iterator it = begin(i);
@@ -458,12 +448,10 @@ class sparse_hashtable {
 
   // This is used when resizing
   destructive_iterator destructive_begin() {
-    return destructive_iterator(this, table.destructive_begin(),
-                                table.destructive_end());
+    return destructive_iterator(this, table.destructive_begin(), table.destructive_end());
   }
   destructive_iterator destructive_end() {
-    return destructive_iterator(this, table.destructive_end(),
-                                table.destructive_end());
+    return destructive_iterator(this, table.destructive_end(), table.destructive_end());
   }
 
   // ACCESSOR FUNCTIONS for the things we templatize on, basically
@@ -527,8 +515,7 @@ class sparse_hashtable {
     settings.set_use_deleted(false);
   }
   key_type deleted_key() const {
-    assert(settings.use_deleted() &&
-           "Must set deleted key before calling deleted_key");
+    assert(settings.use_deleted() && "Must set deleted key before calling deleted_key");
     return key_info.delkey;
   }
 
@@ -605,9 +592,7 @@ class sparse_hashtable {
   size_type max_bucket_count() const { return max_size(); }
   // These are tr1 methods.  Their idea of 'bucket' doesn't map well to
   // what we do.  We just say every bucket has 0 or 1 items in it.
-  size_type bucket_size(size_type i) const {
-    return begin(i) == end(i) ? 0 : 1;
-  }
+  size_type bucket_size(size_type i) const { return begin(i) == end(i) ? 0 : 1; }
 
  private:
   // Because of the above, size_type(-1) is never legal; use it for errors
@@ -654,8 +639,7 @@ class sparse_hashtable {
     if (settings.consider_shrink()) {  // see if lots of deletes happened
       if (maybe_shrink()) did_resize = true;
     }
-    if (table.num_nonempty() >=
-        (std::numeric_limits<size_type>::max)() - delta) {
+    if (table.num_nonempty() >= (std::numeric_limits<size_type>::max)() - delta) {
       throw std::length_error("resize overflow");
     }
     if (bucket_count() >= HT_MIN_BUCKETS &&
@@ -668,13 +652,12 @@ class sparse_hashtable {
     // are currently taking up room).  But later, when we decide what
     // size to resize to, *don't* count deleted buckets, since they
     // get discarded during the resize.
-    const size_type needed_size =
-        settings.min_buckets(table.num_nonempty() + delta, 0);
+    const size_type needed_size = settings.min_buckets(table.num_nonempty() + delta, 0);
     if (needed_size <= bucket_count())  // we have enough buckets
       return did_resize;
 
-    size_type resize_to = settings.min_buckets(
-        table.num_nonempty() - num_deleted + delta, bucket_count());
+    size_type resize_to =
+        settings.min_buckets(table.num_nonempty() - num_deleted + delta, bucket_count());
     if (resize_to < needed_size &&  // may double resize_to
         resize_to < (std::numeric_limits<size_type>::max)() / 2) {
       // This situation means that we have enough deleted elements,
@@ -703,8 +686,7 @@ class sparse_hashtable {
     clear();  // clear table, set num_deleted to 0
 
     // If we need to change the size of our table, do it now
-    const size_type resize_to =
-        settings.min_buckets(ht.size(), min_buckets_wanted);
+    const size_type resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
     if (resize_to > bucket_count()) {  // we don't have enough buckets
       table.resize(resize_to);         // sets the number of buckets
       settings.reset_thresholds(bucket_count());
@@ -720,8 +702,7 @@ class sparse_hashtable {
       const size_type bucket_count_minus_one = bucket_count() - 1;
       for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
            table.test(bucknum);  // not empty
-           bucknum =
-               (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
+           bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
         ++num_probes;
         assert(num_probes < bucket_count() &&
                "Hashtable is full: an error in key_equal<> or hash<>");
@@ -754,14 +735,13 @@ class sparse_hashtable {
     // no duplicates and no deleted items, we can be more efficient
     assert((bucket_count() & (bucket_count() - 1)) == 0);  // a power of two
     // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():
-    for (destructive_iterator it = ht.destructive_begin();
-         it != ht.destructive_end(); ++it) {
+    for (destructive_iterator it = ht.destructive_begin(); it != ht.destructive_end();
+         ++it) {
       size_type num_probes = 0;  // how many times we've probed
       size_type bucknum;
       for (bucknum = hash(get_key(*it)) & (bucket_count() - 1);  // h % buck_cnt
            table.test(bucknum);                                  // not empty
-           bucknum =
-               (bucknum + JUMP_(key, num_probes)) & (bucket_count() - 1)) {
+           bucknum = (bucknum + JUMP_(key, num_probes)) & (bucket_count() - 1)) {
         ++num_probes;
         assert(num_probes < bucket_count() &&
                "Hashtable is full: an error in key_equal<> or hash<>");
@@ -803,8 +783,7 @@ class sparse_hashtable {
                             const HashFcn& hf = HashFcn(),
                             const EqualKey& eql = EqualKey(),
                             const ExtractKey& ext = ExtractKey(),
-                            const SetKey& set = SetKey(),
-                            const Alloc& alloc = Alloc())
+                            const SetKey& set = SetKey(), const Alloc& alloc = Alloc())
       : settings(hf),
         key_info(ext, set, eql),
         num_deleted(0),
@@ -921,8 +900,7 @@ class sparse_hashtable {
     if (pos.first == ILLEGAL_BUCKET)  // alas, not there
       return end();
     else
-      return const_iterator(this, table.get_iter(pos.first),
-                            table.nonempty_end());
+      return const_iterator(this, table.get_iter(pos.first), table.nonempty_end());
   }
 
   // This is a tr1 method: the bucket a given key is in, or what bucket
@@ -951,8 +929,7 @@ class sparse_hashtable {
     }
   }
   template <typename K>
-  std::pair<const_iterator, const_iterator> equal_range(
-      const K& key) const {
+  std::pair<const_iterator, const_iterator> equal_range(const K& key) const {
     const_iterator pos = find(key);  // either an iterator or end
     if (pos == end()) {
       return std::pair<const_iterator, const_iterator>(pos, pos);
@@ -982,9 +959,8 @@ class sparse_hashtable {
   // If you know *this is big enough to hold obj, use this routine
   std::pair<iterator, bool> insert_noresize(const_reference obj) {
     // First, double-check we're not inserting delkey
-    assert(
-        (!settings.use_deleted() || !equals(get_key(obj), key_info.delkey)) &&
-        "Inserting the deleted key");
+    assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey)) &&
+           "Inserting the deleted key");
     const std::pair<size_type, size_type> pos = find_position(get_key(obj));
     if (pos.first != ILLEGAL_BUCKET) {  // object was already there
       return std::pair<iterator, bool>(
@@ -1026,8 +1002,7 @@ class sparse_hashtable {
   template <class InputIterator>
   void insert(InputIterator f, InputIterator l) {
     // specializes on iterator type
-    insert(f, l,
-           typename std::iterator_traits<InputIterator>::iterator_category());
+    insert(f, l, typename std::iterator_traits<InputIterator>::iterator_category());
   }
 
   // DefaultValue is a functor that takes a key and returns a value_type
@@ -1204,8 +1179,8 @@ class sparse_hashtable {
       : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
                                                    HT_MIN_BUCKETS> {
     explicit Settings(const hasher& hf)
-        : sparsehash_internal::sh_hashtable_settings<key_type, hasher,
-                                                     size_type, HT_MIN_BUCKETS>(
+        : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
+                                                     HT_MIN_BUCKETS>(
               hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
   };
 
@@ -1220,9 +1195,7 @@ class sparse_hashtable {
     typename ExtractKey::result_type get_key(const_reference v) const {
       return ExtractKey::operator()(v);
     }
-    void set_key(pointer v, const key_type& k) const {
-      SetKey::operator()(v, k);
-    }
+    void set_key(pointer v, const key_type& k) const { SetKey::operator()(v, k); }
     template <typename K1, typename K2>
     bool equals(const K1& a, const K2& b) const {
       return EqualKey::operator()(a, b);
@@ -1236,7 +1209,9 @@ class sparse_hashtable {
 
   // Utility functions to access the templated operators
   template <typename K>
-  size_type hash(const K& v) const { return settings.hash(v); }
+  size_type hash(const K& v) const {
+    return settings.hash(v);
+  }
   template <typename K1, typename K2>
   bool equals(const K1& a, const K2& b) const {
     return key_info.equals(a, b);
@@ -1275,7 +1250,6 @@ const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT = 80;
 // How empty we let the table get before we resize lower.
 // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
-const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT =
-    static_cast<int>(
-        0.4 * sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT);
-}
+const int sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_EMPTY_PCT = static_cast<int>(
+    0.4 * sparse_hashtable<V, K, HF, ExK, SetK, EqK, A>::HT_OCCUPANCY_PCT);
+}  // namespace google
diff --git a/native-sql-engine/cpp/src/third_party/sparsehash/sparse_hash_map.h b/native-sql-engine/cpp/src/third_party/sparsehash/sparse_hash_map.h
index ff763feeb..885b5840a 100644
--- a/native-sql-engine/cpp/src/third_party/sparsehash/sparse_hash_map.h
+++ b/native-sql-engine/cpp/src/third_party/sparsehash/sparse_hash_map.h
@@ -14,6 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+#pragma once
 #include <arrow/memory_pool.h>
 #include <arrow/status.h>
 
diff --git a/native-sql-engine/cpp/src/third_party/timsort.hpp b/native-sql-engine/cpp/src/third_party/timsort.hpp
index fa2b4625c..df8b94133 100644
--- a/native-sql-engine/cpp/src/third_party/timsort.hpp
+++ b/native-sql-engine/cpp/src/third_party/timsort.hpp
@@ -3,7 +3,8 @@
  *
  * ported from Python's and OpenJDK's:
  * - http://svn.python.org/projects/python/trunk/Objects/listobject.c
- * - http://cr.openjdk.java.net/~martin/webrevs/openjdk7/timsort/raw_files/new/src/share/classes/java/util/TimSort.java
+ * -
+ * http://cr.openjdk.java.net/~martin/webrevs/openjdk7/timsort/raw_files/new/src/share/classes/java/util/TimSort.java
  *
  * Copyright (c) 2011 Fuji, Goro (gfx) <gfuji@cpan.org>.
  * Copyright (c) 2019-2020 Morwenn.
@@ -45,20 +46,19 @@
 // Diagnostic selection macros
 
 #ifdef GFX_TIMSORT_ENABLE_ASSERT
-#   include <cassert>
-#   define GFX_TIMSORT_ASSERT(expr) assert(expr)
+#include <cassert>
+#define GFX_TIMSORT_ASSERT(expr) assert(expr)
 #else
-#   define GFX_TIMSORT_ASSERT(expr) ((void)0)
+#define GFX_TIMSORT_ASSERT(expr) ((void)0)
 #endif
 
 #ifdef GFX_TIMSORT_ENABLE_LOG
-#   include <iostream>
-#   define GFX_TIMSORT_LOG(expr) (std::clog << "# " << __func__ << ": " << expr << std::endl)
+#include <iostream>
+#define GFX_TIMSORT_LOG(expr) (std::clog << "# " << __func__ << ": " << expr << std::endl)
 #else
-#   define GFX_TIMSORT_LOG(expr) ((void)0)
+#define GFX_TIMSORT_LOG(expr) ((void)0)
 #endif
 
-
 namespace gfx {
 
 // ---------------------------------------
@@ -69,613 +69,610 @@ namespace detail {
 
 // Equivalent to C++20 std::identity
 struct identity {
-    template <typename T>
-    constexpr T&& operator()(T&& value) const noexcept
-    {
-        return std::forward<T>(value);
-    }
+  template <typename T>
+  constexpr T&& operator()(T&& value) const noexcept {
+    return std::forward<T>(value);
+  }
 };
 
 // Merge a predicate and a projection function
 template <typename Compare, typename Projection>
 struct projection_compare {
-    projection_compare(Compare comp, Projection proj) : compare(comp), projection(proj) {
-    }
+  projection_compare(Compare comp, Projection proj) : compare(comp), projection(proj) {}
 
-    template <typename T, typename U>
-    bool operator()(T &&lhs, U &&rhs) {
+  template <typename T, typename U>
+  bool operator()(T&& lhs, U&& rhs) {
 #ifdef __cpp_lib_invoke
-        return static_cast<bool>(std::invoke(compare,
-            std::invoke(projection, std::forward<T>(lhs)),
-            std::invoke(projection, std::forward<U>(rhs))
-        ));
+    return static_cast<bool>(std::invoke(compare,
+                                         std::invoke(projection, std::forward<T>(lhs)),
+                                         std::invoke(projection, std::forward<U>(rhs))));
 #else
-        return static_cast<bool>(compare(
-            projection(std::forward<T>(lhs)),
-            projection(std::forward<U>(rhs))
-        ));
+    return static_cast<bool>(
+        compare(projection(std::forward<T>(lhs)), projection(std::forward<U>(rhs))));
 #endif
-    }
+  }
 
-    Compare compare;
-    Projection projection;
+  Compare compare;
+  Projection projection;
 };
 
-template <typename Iterator> struct run {
-    typedef typename std::iterator_traits<Iterator>::difference_type diff_t;
+template <typename Iterator>
+struct run {
+  typedef typename std::iterator_traits<Iterator>::difference_type diff_t;
 
-    Iterator base;
-    diff_t len;
+  Iterator base;
+  diff_t len;
 
-    run(Iterator b, diff_t l) : base(b), len(l) {
-    }
+  run(Iterator b, diff_t l) : base(b), len(l) {}
 };
 
-template <typename RandomAccessIterator, typename Compare> class TimSort {
-    typedef RandomAccessIterator iter_t;
-    typedef typename std::iterator_traits<iter_t>::value_type value_t;
-    typedef typename std::iterator_traits<iter_t>::reference ref_t;
-    typedef typename std::iterator_traits<iter_t>::difference_type diff_t;
+template <typename RandomAccessIterator, typename Compare>
+class TimSort {
+  typedef RandomAccessIterator iter_t;
+  typedef typename std::iterator_traits<iter_t>::value_type value_t;
+  typedef typename std::iterator_traits<iter_t>::reference ref_t;
+  typedef typename std::iterator_traits<iter_t>::difference_type diff_t;
 
-    static const int MIN_MERGE = 32;
-    static const int MIN_GALLOP = 7;
+  static const int MIN_MERGE = 32;
+  static const int MIN_GALLOP = 7;
 
-    int minGallop_; // default to MIN_GALLOP
+  int minGallop_;  // default to MIN_GALLOP
 
-    std::vector<value_t> tmp_; // temp storage for merges
-    typedef typename std::vector<value_t>::iterator tmp_iter_t;
+  std::vector<value_t> tmp_;  // temp storage for merges
+  typedef typename std::vector<value_t>::iterator tmp_iter_t;
 
-    std::vector<run<RandomAccessIterator> > pending_;
+  std::vector<run<RandomAccessIterator> > pending_;
 
-    static void binarySort(iter_t const lo, iter_t const hi, iter_t start, Compare compare) {
-        GFX_TIMSORT_ASSERT(lo <= start);
-        GFX_TIMSORT_ASSERT(start <= hi);
-        if (start == lo) {
-            ++start;
-        }
-        for (; start < hi; ++start) {
-            GFX_TIMSORT_ASSERT(lo <= start);
-            value_t pivot = std::move(*start);
-
-            iter_t const pos = std::upper_bound(lo, start, pivot, compare);
-            for (iter_t p = start; p > pos; --p) {
-                *p = std::move(*(p - 1));
-            }
-            *pos = std::move(pivot);
-        }
+  static void binarySort(iter_t const lo, iter_t const hi, iter_t start,
+                         Compare compare) {
+    GFX_TIMSORT_ASSERT(lo <= start);
+    GFX_TIMSORT_ASSERT(start <= hi);
+    if (start == lo) {
+      ++start;
     }
+    for (; start < hi; ++start) {
+      GFX_TIMSORT_ASSERT(lo <= start);
+      value_t pivot = std::move(*start);
+
+      iter_t const pos = std::upper_bound(lo, start, pivot, compare);
+      for (iter_t p = start; p > pos; --p) {
+        *p = std::move(*(p - 1));
+      }
+      *pos = std::move(pivot);
+    }
+  }
 
-    static diff_t countRunAndMakeAscending(iter_t const lo, iter_t const hi, Compare compare) {
-        GFX_TIMSORT_ASSERT(lo < hi);
-
-        iter_t runHi = lo + 1;
-        if (runHi == hi) {
-            return 1;
-        }
+  static diff_t countRunAndMakeAscending(iter_t const lo, iter_t const hi,
+                                         Compare compare) {
+    GFX_TIMSORT_ASSERT(lo < hi);
 
-        if (compare(*runHi, *lo)) { // decreasing
-            do {
-                ++runHi;
-            } while (runHi < hi && compare(*runHi, *(runHi - 1)));
-            std::reverse(lo, runHi);
-        } else { // non-decreasing
-            do {
-                ++runHi;
-            } while (runHi < hi && !compare(*runHi, *(runHi - 1)));
-        }
+    iter_t runHi = lo + 1;
+    if (runHi == hi) {
+      return 1;
+    }
 
-        return runHi - lo;
+    if (compare(*runHi, *lo)) {  // decreasing
+      do {
+        ++runHi;
+      } while (runHi < hi && compare(*runHi, *(runHi - 1)));
+      std::reverse(lo, runHi);
+    } else {  // non-decreasing
+      do {
+        ++runHi;
+      } while (runHi < hi && !compare(*runHi, *(runHi - 1)));
     }
 
-    static diff_t minRunLength(diff_t n) {
-        GFX_TIMSORT_ASSERT(n >= 0);
+    return runHi - lo;
+  }
 
-        diff_t r = 0;
-        while (n >= 2 * MIN_MERGE) {
-            r |= (n & 1);
-            n >>= 1;
-        }
-        return n + r;
-    }
+  static diff_t minRunLength(diff_t n) {
+    GFX_TIMSORT_ASSERT(n >= 0);
 
-    TimSort() : minGallop_(MIN_GALLOP) {
+    diff_t r = 0;
+    while (n >= 2 * MIN_MERGE) {
+      r |= (n & 1);
+      n >>= 1;
     }
+    return n + r;
+  }
 
-    // Silence GCC -Winline warning
-    ~TimSort() {}
+  TimSort() : minGallop_(MIN_GALLOP) {}
 
-    void pushRun(iter_t const runBase, diff_t const runLen) {
-        pending_.push_back(run<iter_t>(runBase, runLen));
-    }
+  // Silence GCC -Winline warning
+  ~TimSort() {}
+
+  void pushRun(iter_t const runBase, diff_t const runLen) {
+    pending_.push_back(run<iter_t>(runBase, runLen));
+  }
+
+  void mergeCollapse(Compare compare) {
+    while (pending_.size() > 1) {
+      diff_t n = pending_.size() - 2;
 
-    void mergeCollapse(Compare compare) {
-        while (pending_.size() > 1) {
-            diff_t n = pending_.size() - 2;
-
-            if ((n > 0 && pending_[n - 1].len <= pending_[n].len + pending_[n + 1].len) ||
-                (n > 1 && pending_[n - 2].len <= pending_[n - 1].len + pending_[n].len)) {
-                if (pending_[n - 1].len < pending_[n + 1].len) {
-                    --n;
-                }
-                mergeAt(n, compare);
-            } else if (pending_[n].len <= pending_[n + 1].len) {
-                mergeAt(n, compare);
-            } else {
-                break;
-            }
+      if ((n > 0 && pending_[n - 1].len <= pending_[n].len + pending_[n + 1].len) ||
+          (n > 1 && pending_[n - 2].len <= pending_[n - 1].len + pending_[n].len)) {
+        if (pending_[n - 1].len < pending_[n + 1].len) {
+          --n;
         }
+        mergeAt(n, compare);
+      } else if (pending_[n].len <= pending_[n + 1].len) {
+        mergeAt(n, compare);
+      } else {
+        break;
+      }
     }
+  }
 
-    void mergeForceCollapse(Compare compare) {
-        while (pending_.size() > 1) {
-            diff_t n = pending_.size() - 2;
+  void mergeForceCollapse(Compare compare) {
+    while (pending_.size() > 1) {
+      diff_t n = pending_.size() - 2;
 
-            if (n > 0 && pending_[n - 1].len < pending_[n + 1].len) {
-                --n;
-            }
-            mergeAt(n, compare);
-        }
+      if (n > 0 && pending_[n - 1].len < pending_[n + 1].len) {
+        --n;
+      }
+      mergeAt(n, compare);
     }
+  }
 
-    void mergeAt(diff_t const i, Compare compare) {
-        diff_t const stackSize = pending_.size();
-        GFX_TIMSORT_ASSERT(stackSize >= 2);
-        GFX_TIMSORT_ASSERT(i >= 0);
-        GFX_TIMSORT_ASSERT(i == stackSize - 2 || i == stackSize - 3);
+  void mergeAt(diff_t const i, Compare compare) {
+    diff_t const stackSize = pending_.size();
+    GFX_TIMSORT_ASSERT(stackSize >= 2);
+    GFX_TIMSORT_ASSERT(i >= 0);
+    GFX_TIMSORT_ASSERT(i == stackSize - 2 || i == stackSize - 3);
 
-        iter_t base1 = pending_[i].base;
-        diff_t len1 = pending_[i].len;
-        iter_t base2 = pending_[i + 1].base;
-        diff_t len2 = pending_[i + 1].len;
+    iter_t base1 = pending_[i].base;
+    diff_t len1 = pending_[i].len;
+    iter_t base2 = pending_[i + 1].base;
+    diff_t len2 = pending_[i + 1].len;
 
-        GFX_TIMSORT_ASSERT(len1 > 0);
-        GFX_TIMSORT_ASSERT(len2 > 0);
-        GFX_TIMSORT_ASSERT(base1 + len1 == base2);
+    GFX_TIMSORT_ASSERT(len1 > 0);
+    GFX_TIMSORT_ASSERT(len2 > 0);
+    GFX_TIMSORT_ASSERT(base1 + len1 == base2);
 
-        pending_[i].len = len1 + len2;
+    pending_[i].len = len1 + len2;
 
-        if (i == stackSize - 3) {
-            pending_[i + 1] = pending_[i + 2];
-        }
+    if (i == stackSize - 3) {
+      pending_[i + 1] = pending_[i + 2];
+    }
 
-        pending_.pop_back();
+    pending_.pop_back();
 
-        diff_t const k = gallopRight(*base2, base1, len1, 0, compare);
-        GFX_TIMSORT_ASSERT(k >= 0);
+    diff_t const k = gallopRight(*base2, base1, len1, 0, compare);
+    GFX_TIMSORT_ASSERT(k >= 0);
 
-        base1 += k;
-        len1 -= k;
+    base1 += k;
+    len1 -= k;
 
-        if (len1 == 0) {
-            return;
-        }
+    if (len1 == 0) {
+      return;
+    }
 
-        len2 = gallopLeft(*(base1 + (len1 - 1)), base2, len2, len2 - 1, compare);
-        GFX_TIMSORT_ASSERT(len2 >= 0);
-        if (len2 == 0) {
-            return;
-        }
+    len2 = gallopLeft(*(base1 + (len1 - 1)), base2, len2, len2 - 1, compare);
+    GFX_TIMSORT_ASSERT(len2 >= 0);
+    if (len2 == 0) {
+      return;
+    }
 
-        if (len1 <= len2) {
-            mergeLo(base1, len1, base2, len2, compare);
-        } else {
-            mergeHi(base1, len1, base2, len2, compare);
+    if (len1 <= len2) {
+      mergeLo(base1, len1, base2, len2, compare);
+    } else {
+      mergeHi(base1, len1, base2, len2, compare);
+    }
+  }
+
+  template <typename Iter>
+  diff_t gallopLeft(ref_t key, Iter const base, diff_t const len, diff_t const hint,
+                    Compare compare) {
+    GFX_TIMSORT_ASSERT(len > 0);
+    GFX_TIMSORT_ASSERT(hint >= 0);
+    GFX_TIMSORT_ASSERT(hint < len);
+
+    diff_t lastOfs = 0;
+    diff_t ofs = 1;
+
+    if (compare(*(base + hint), key)) {
+      diff_t const maxOfs = len - hint;
+      while (ofs < maxOfs && compare(*(base + (hint + ofs)), key)) {
+        lastOfs = ofs;
+        ofs = (ofs << 1) + 1;
+
+        if (ofs <= 0) {  // int overflow
+          ofs = maxOfs;
+        }
+      }
+      if (ofs > maxOfs) {
+        ofs = maxOfs;
+      }
+
+      lastOfs += hint;
+      ofs += hint;
+    } else {
+      diff_t const maxOfs = hint + 1;
+      while (ofs < maxOfs && !compare(*(base + (hint - ofs)), key)) {
+        lastOfs = ofs;
+        ofs = (ofs << 1) + 1;
+
+        if (ofs <= 0) {
+          ofs = maxOfs;
         }
+      }
+      if (ofs > maxOfs) {
+        ofs = maxOfs;
+      }
+
+      diff_t const tmp = lastOfs;
+      lastOfs = hint - ofs;
+      ofs = hint - tmp;
     }
-
-    template <typename Iter>
-    diff_t gallopLeft(ref_t key, Iter const base, diff_t const len, diff_t const hint, Compare compare) {
-        GFX_TIMSORT_ASSERT(len > 0);
-        GFX_TIMSORT_ASSERT(hint >= 0);
-        GFX_TIMSORT_ASSERT(hint < len);
-
-        diff_t lastOfs = 0;
-        diff_t ofs = 1;
-
-        if (compare(*(base + hint), key)) {
-            diff_t const maxOfs = len - hint;
-            while (ofs < maxOfs && compare(*(base + (hint + ofs)), key)) {
-                lastOfs = ofs;
-                ofs = (ofs << 1) + 1;
-
-                if (ofs <= 0) { // int overflow
-                    ofs = maxOfs;
-                }
-            }
-            if (ofs > maxOfs) {
-                ofs = maxOfs;
-            }
-
-            lastOfs += hint;
-            ofs += hint;
-        } else {
-            diff_t const maxOfs = hint + 1;
-            while (ofs < maxOfs && !compare(*(base + (hint - ofs)), key)) {
-                lastOfs = ofs;
-                ofs = (ofs << 1) + 1;
-
-                if (ofs <= 0) {
-                    ofs = maxOfs;
-                }
-            }
-            if (ofs > maxOfs) {
-                ofs = maxOfs;
-            }
-
-            diff_t const tmp = lastOfs;
-            lastOfs = hint - ofs;
-            ofs = hint - tmp;
+    GFX_TIMSORT_ASSERT(-1 <= lastOfs);
+    GFX_TIMSORT_ASSERT(lastOfs < ofs);
+    GFX_TIMSORT_ASSERT(ofs <= len);
+
+    return std::lower_bound(base + (lastOfs + 1), base + ofs, key, compare) - base;
+  }
+
+  template <typename Iter>
+  diff_t gallopRight(ref_t key, Iter const base, diff_t const len, diff_t const hint,
+                     Compare compare) {
+    GFX_TIMSORT_ASSERT(len > 0);
+    GFX_TIMSORT_ASSERT(hint >= 0);
+    GFX_TIMSORT_ASSERT(hint < len);
+
+    diff_t ofs = 1;
+    diff_t lastOfs = 0;
+
+    if (compare(key, *(base + hint))) {
+      diff_t const maxOfs = hint + 1;
+      while (ofs < maxOfs && compare(key, *(base + (hint - ofs)))) {
+        lastOfs = ofs;
+        ofs = (ofs << 1) + 1;
+
+        if (ofs <= 0) {
+          ofs = maxOfs;
+        }
+      }
+      if (ofs > maxOfs) {
+        ofs = maxOfs;
+      }
+
+      diff_t const tmp = lastOfs;
+      lastOfs = hint - ofs;
+      ofs = hint - tmp;
+    } else {
+      diff_t const maxOfs = len - hint;
+      while (ofs < maxOfs && !compare(key, *(base + (hint + ofs)))) {
+        lastOfs = ofs;
+        ofs = (ofs << 1) + 1;
+
+        if (ofs <= 0) {  // int overflow
+          ofs = maxOfs;
         }
-        GFX_TIMSORT_ASSERT(-1 <= lastOfs);
-        GFX_TIMSORT_ASSERT(lastOfs < ofs);
-        GFX_TIMSORT_ASSERT(ofs <= len);
+      }
+      if (ofs > maxOfs) {
+        ofs = maxOfs;
+      }
 
-        return std::lower_bound(base + (lastOfs + 1), base + ofs, key, compare) - base;
+      lastOfs += hint;
+      ofs += hint;
+    }
+    GFX_TIMSORT_ASSERT(-1 <= lastOfs);
+    GFX_TIMSORT_ASSERT(lastOfs < ofs);
+    GFX_TIMSORT_ASSERT(ofs <= len);
+
+    return std::upper_bound(base + (lastOfs + 1), base + ofs, key, compare) - base;
+  }
+
+  static void rotateLeft(iter_t first, iter_t last) {
+    value_t tmp = std::move(*first);
+    iter_t last_1 = std::move(first + 1, last, first);
+    *last_1 = std::move(tmp);
+  }
+
+  static void rotateRight(iter_t first, iter_t last) {
+    iter_t last_1 = last - 1;
+    value_t tmp = std::move(*last_1);
+    std::move_backward(first, last_1, last);
+    *first = std::move(tmp);
+  }
+
+  void mergeLo(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2,
+               Compare compare) {
+    GFX_TIMSORT_ASSERT(len1 > 0);
+    GFX_TIMSORT_ASSERT(len2 > 0);
+    GFX_TIMSORT_ASSERT(base1 + len1 == base2);
+
+    if (len1 == 1) {
+      return rotateLeft(base1, base2 + len2);
+    }
+    if (len2 == 1) {
+      return rotateRight(base1, base2 + len2);
     }
 
-    template <typename Iter>
-    diff_t gallopRight(ref_t key, Iter const base, diff_t const len, diff_t const hint, Compare compare) {
-        GFX_TIMSORT_ASSERT(len > 0);
-        GFX_TIMSORT_ASSERT(hint >= 0);
-        GFX_TIMSORT_ASSERT(hint < len);
-
-        diff_t ofs = 1;
-        diff_t lastOfs = 0;
-
-        if (compare(key, *(base + hint))) {
-            diff_t const maxOfs = hint + 1;
-            while (ofs < maxOfs && compare(key, *(base + (hint - ofs)))) {
-                lastOfs = ofs;
-                ofs = (ofs << 1) + 1;
-
-                if (ofs <= 0) {
-                    ofs = maxOfs;
-                }
-            }
-            if (ofs > maxOfs) {
-                ofs = maxOfs;
-            }
-
-            diff_t const tmp = lastOfs;
-            lastOfs = hint - ofs;
-            ofs = hint - tmp;
-        } else {
-            diff_t const maxOfs = len - hint;
-            while (ofs < maxOfs && !compare(key, *(base + (hint + ofs)))) {
-                lastOfs = ofs;
-                ofs = (ofs << 1) + 1;
-
-                if (ofs <= 0) { // int overflow
-                    ofs = maxOfs;
-                }
-            }
-            if (ofs > maxOfs) {
-                ofs = maxOfs;
-            }
-
-            lastOfs += hint;
-            ofs += hint;
-        }
-        GFX_TIMSORT_ASSERT(-1 <= lastOfs);
-        GFX_TIMSORT_ASSERT(lastOfs < ofs);
-        GFX_TIMSORT_ASSERT(ofs <= len);
+    copy_to_tmp(base1, len1);
 
-        return std::upper_bound(base + (lastOfs + 1), base + ofs, key, compare) - base;
-    }
+    tmp_iter_t cursor1 = tmp_.begin();
+    iter_t cursor2 = base2;
+    iter_t dest = base1;
 
-    static void rotateLeft(iter_t first, iter_t last)
-    {
-        value_t tmp = std::move(*first);
-        iter_t last_1 = std::move(first + 1, last, first);
-        *last_1 = std::move(tmp);
-    }
+    *dest = std::move(*cursor2);
+    ++cursor2;
+    ++dest;
+    --len2;
 
-    static void rotateRight(iter_t first, iter_t last)
-    {
-        iter_t last_1 = last - 1;
-        value_t tmp = std::move(*last_1);
-        std::move_backward(first, last_1, last);
-        *first = std::move(tmp);
-    }
+    int minGallop(minGallop_);
 
+    // outer:
+    while (true) {
+      diff_t count1 = 0;
+      diff_t count2 = 0;
 
-    void mergeLo(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2, Compare compare) {
-        GFX_TIMSORT_ASSERT(len1 > 0);
+      do {
+        GFX_TIMSORT_ASSERT(len1 > 1);
         GFX_TIMSORT_ASSERT(len2 > 0);
-        GFX_TIMSORT_ASSERT(base1 + len1 == base2);
 
-        if (len1 == 1) {
-            return rotateLeft(base1, base2 + len2);
-        }
-        if (len2 == 1) {
-            return rotateRight(base1, base2 + len2);
+        if (compare(*cursor2, *cursor1)) {
+          *dest = std::move(*cursor2);
+          ++cursor2;
+          ++dest;
+          ++count2;
+          count1 = 0;
+          if (--len2 == 0) {
+            goto epilogue;
+          }
+        } else {
+          *dest = std::move(*cursor1);
+          ++cursor1;
+          ++dest;
+          ++count1;
+          count2 = 0;
+          if (--len1 == 1) {
+            goto epilogue;
+          }
         }
+      } while ((count1 | count2) < minGallop);
 
-        copy_to_tmp(base1, len1);
+      do {
+        GFX_TIMSORT_ASSERT(len1 > 1);
+        GFX_TIMSORT_ASSERT(len2 > 0);
 
-        tmp_iter_t cursor1 = tmp_.begin();
-        iter_t cursor2 = base2;
-        iter_t dest = base1;
+        count1 = gallopRight(*cursor2, cursor1, len1, 0, compare);
+        if (count1 != 0) {
+          std::move_backward(cursor1, cursor1 + count1, dest + count1);
+          dest += count1;
+          cursor1 += count1;
+          len1 -= count1;
 
+          if (len1 <= 1) {
+            goto epilogue;
+          }
+        }
         *dest = std::move(*cursor2);
         ++cursor2;
         ++dest;
-        --len2;
-
-        int minGallop(minGallop_);
-
-        // outer:
-        while (true) {
-            diff_t count1 = 0;
-            diff_t count2 = 0;
-
-            do {
-                GFX_TIMSORT_ASSERT(len1 > 1);
-                GFX_TIMSORT_ASSERT(len2 > 0);
-
-                if (compare(*cursor2, *cursor1)) {
-                    *dest = std::move(*cursor2);
-                    ++cursor2;
-                    ++dest;
-                    ++count2;
-                    count1 = 0;
-                    if (--len2 == 0) {
-                        goto epilogue;
-                    }
-                } else {
-                    *dest = std::move(*cursor1);
-                    ++cursor1;
-                    ++dest;
-                    ++count1;
-                    count2 = 0;
-                    if (--len1 == 1) {
-                        goto epilogue;
-                    }
-                }
-            } while ((count1 | count2) < minGallop);
-
-            do {
-                GFX_TIMSORT_ASSERT(len1 > 1);
-                GFX_TIMSORT_ASSERT(len2 > 0);
-
-                count1 = gallopRight(*cursor2, cursor1, len1, 0, compare);
-                if (count1 != 0) {
-                    std::move_backward(cursor1, cursor1 + count1, dest + count1);
-                    dest += count1;
-                    cursor1 += count1;
-                    len1 -= count1;
-
-                    if (len1 <= 1) {
-                        goto epilogue;
-                    }
-                }
-                *dest = std::move(*cursor2);
-                ++cursor2;
-                ++dest;
-                if (--len2 == 0) {
-                    goto epilogue;
-                }
-
-                count2 = gallopLeft(*cursor1, cursor2, len2, 0, compare);
-                if (count2 != 0) {
-                    std::move(cursor2, cursor2 + count2, dest);
-                    dest += count2;
-                    cursor2 += count2;
-                    len2 -= count2;
-                    if (len2 == 0) {
-                        goto epilogue;
-                    }
-                }
-                *dest = std::move(*cursor1);
-                ++cursor1;
-                ++dest;
-                if (--len1 == 1) {
-                    goto epilogue;
-                }
-
-                --minGallop;
-            } while ((count1 >= MIN_GALLOP) | (count2 >= MIN_GALLOP));
-
-            if (minGallop < 0) {
-                minGallop = 0;
-            }
-            minGallop += 2;
-        } // end of "outer" loop
-
-        epilogue: // merge what is left from either cursor1 or cursor2
-
-        minGallop_ = (std::min)(minGallop, 1);
-
-        if (len1 == 1) {
-            GFX_TIMSORT_ASSERT(len2 > 0);
-            std::move(cursor2, cursor2 + len2, dest);
-            *(dest + len2) = std::move(*cursor1);
-        } else {
-            GFX_TIMSORT_ASSERT(len1 != 0 && "Comparison function violates its general contract");
-            GFX_TIMSORT_ASSERT(len2 == 0);
-            GFX_TIMSORT_ASSERT(len1 > 1);
-            std::move(cursor1, cursor1 + len1, dest);
+        if (--len2 == 0) {
+          goto epilogue;
+        }
+
+        count2 = gallopLeft(*cursor1, cursor2, len2, 0, compare);
+        if (count2 != 0) {
+          std::move(cursor2, cursor2 + count2, dest);
+          dest += count2;
+          cursor2 += count2;
+          len2 -= count2;
+          if (len2 == 0) {
+            goto epilogue;
+          }
+        }
+        *dest = std::move(*cursor1);
+        ++cursor1;
+        ++dest;
+        if (--len1 == 1) {
+          goto epilogue;
         }
+
+        --minGallop;
+      } while ((count1 >= MIN_GALLOP) | (count2 >= MIN_GALLOP));
+
+      if (minGallop < 0) {
+        minGallop = 0;
+      }
+      minGallop += 2;
+    }  // end of "outer" loop
+
+  epilogue:  // merge what is left from either cursor1 or cursor2
+
+    minGallop_ = (std::min)(minGallop, 1);
+
+    if (len1 == 1) {
+      GFX_TIMSORT_ASSERT(len2 > 0);
+      std::move(cursor2, cursor2 + len2, dest);
+      *(dest + len2) = std::move(*cursor1);
+    } else {
+      GFX_TIMSORT_ASSERT(len1 != 0 &&
+                         "Comparison function violates its general contract");
+      GFX_TIMSORT_ASSERT(len2 == 0);
+      GFX_TIMSORT_ASSERT(len1 > 1);
+      std::move(cursor1, cursor1 + len1, dest);
+    }
+  }
+
+  void mergeHi(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2,
+               Compare compare) {
+    GFX_TIMSORT_ASSERT(len1 > 0);
+    GFX_TIMSORT_ASSERT(len2 > 0);
+    GFX_TIMSORT_ASSERT(base1 + len1 == base2);
+
+    if (len1 == 1) {
+      return rotateLeft(base1, base2 + len2);
+    }
+    if (len2 == 1) {
+      return rotateRight(base1, base2 + len2);
     }
 
-    void mergeHi(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2, Compare compare) {
+    copy_to_tmp(base2, len2);
+
+    iter_t cursor1 = base1 + len1;
+    tmp_iter_t cursor2 = tmp_.begin() + (len2 - 1);
+    iter_t dest = base2 + (len2 - 1);
+
+    *dest = std::move(*(--cursor1));
+    --dest;
+    --len1;
+
+    int minGallop(minGallop_);
+
+    // outer:
+    while (true) {
+      diff_t count1 = 0;
+      diff_t count2 = 0;
+
+      // The next loop is a hot path of the algorithm, so we decrement
+      // eagerly the cursor so that it always points directly to the value
+      // to compare, but we have to implement some trickier logic to make
+      // sure that it points to the next value again by the end of said loop
+      --cursor1;
+
+      do {
         GFX_TIMSORT_ASSERT(len1 > 0);
-        GFX_TIMSORT_ASSERT(len2 > 0);
-        GFX_TIMSORT_ASSERT(base1 + len1 == base2);
+        GFX_TIMSORT_ASSERT(len2 > 1);
+
+        if (compare(*cursor2, *cursor1)) {
+          *dest = std::move(*cursor1);
+          --dest;
+          ++count1;
+          count2 = 0;
+          if (--len1 == 0) {
+            goto epilogue;
+          }
+          --cursor1;
+        } else {
+          *dest = std::move(*cursor2);
+          --cursor2;
+          --dest;
+          ++count2;
+          count1 = 0;
+          if (--len2 == 1) {
+            ++cursor1;  // See comment before the loop
+            goto epilogue;
+          }
+        }
+      } while ((count1 | count2) < minGallop);
+      ++cursor1;  // See comment before the loop
 
-        if (len1 == 1) {
-            return rotateLeft(base1, base2 + len2);
+      do {
+        GFX_TIMSORT_ASSERT(len1 > 0);
+        GFX_TIMSORT_ASSERT(len2 > 1);
+
+        count1 = len1 - gallopRight(*cursor2, base1, len1, len1 - 1, compare);
+        if (count1 != 0) {
+          dest -= count1;
+          cursor1 -= count1;
+          len1 -= count1;
+          std::move_backward(cursor1, cursor1 + count1, dest + (1 + count1));
+
+          if (len1 == 0) {
+            goto epilogue;
+          }
         }
-        if (len2 == 1) {
-            return rotateRight(base1, base2 + len2);
+        *dest = std::move(*cursor2);
+        --cursor2;
+        --dest;
+        if (--len2 == 1) {
+          goto epilogue;
         }
 
-        copy_to_tmp(base2, len2);
-
-        iter_t cursor1 = base1 + len1;
-        tmp_iter_t cursor2 = tmp_.begin() + (len2 - 1);
-        iter_t dest = base2 + (len2 - 1);
-
+        count2 = len2 - gallopLeft(*(cursor1 - 1), tmp_.begin(), len2, len2 - 1, compare);
+        if (count2 != 0) {
+          dest -= count2;
+          cursor2 -= count2;
+          len2 -= count2;
+          std::move(cursor2 + 1, cursor2 + (1 + count2), dest + 1);
+          if (len2 <= 1) {
+            goto epilogue;
+          }
+        }
         *dest = std::move(*(--cursor1));
         --dest;
-        --len1;
-
-        int minGallop(minGallop_);
-
-        // outer:
-        while (true) {
-            diff_t count1 = 0;
-            diff_t count2 = 0;
-
-            // The next loop is a hot path of the algorithm, so we decrement
-            // eagerly the cursor so that it always points directly to the value
-            // to compare, but we have to implement some trickier logic to make
-            // sure that it points to the next value again by the end of said loop
-            --cursor1;
-
-            do {
-                GFX_TIMSORT_ASSERT(len1 > 0);
-                GFX_TIMSORT_ASSERT(len2 > 1);
-
-                if (compare(*cursor2, *cursor1)) {
-                    *dest = std::move(*cursor1);
-                    --dest;
-                    ++count1;
-                    count2 = 0;
-                    if (--len1 == 0) {
-                        goto epilogue;
-                    }
-                    --cursor1;
-                } else {
-                    *dest = std::move(*cursor2);
-                    --cursor2;
-                    --dest;
-                    ++count2;
-                    count1 = 0;
-                    if (--len2 == 1) {
-                        ++cursor1; // See comment before the loop
-                        goto epilogue;
-                    }
-                }
-            } while ((count1 | count2) < minGallop);
-            ++cursor1; // See comment before the loop
-
-            do {
-                GFX_TIMSORT_ASSERT(len1 > 0);
-                GFX_TIMSORT_ASSERT(len2 > 1);
-
-                count1 = len1 - gallopRight(*cursor2, base1, len1, len1 - 1, compare);
-                if (count1 != 0) {
-                    dest -= count1;
-                    cursor1 -= count1;
-                    len1 -= count1;
-                    std::move_backward(cursor1, cursor1 + count1, dest + (1 + count1));
-
-                    if (len1 == 0) {
-                        goto epilogue;
-                    }
-                }
-                *dest = std::move(*cursor2);
-                --cursor2;
-                --dest;
-                if (--len2 == 1) {
-                    goto epilogue;
-                }
-
-                count2 = len2 - gallopLeft(*(cursor1 - 1), tmp_.begin(), len2, len2 - 1, compare);
-                if (count2 != 0) {
-                    dest -= count2;
-                    cursor2 -= count2;
-                    len2 -= count2;
-                    std::move(cursor2 + 1, cursor2 + (1 + count2), dest + 1);
-                    if (len2 <= 1) {
-                        goto epilogue;
-                    }
-                }
-                *dest = std::move(*(--cursor1));
-                --dest;
-                if (--len1 == 0) {
-                    goto epilogue;
-                }
-
-                --minGallop;
-            } while ((count1 >= MIN_GALLOP) | (count2 >= MIN_GALLOP));
-
-            if (minGallop < 0) {
-                minGallop = 0;
-            }
-            minGallop += 2;
-        } // end of "outer" loop
-
-        epilogue: // merge what is left from either cursor1 or cursor2
-
-        minGallop_ = (std::min)(minGallop, 1);
-
-        if (len2 == 1) {
-            GFX_TIMSORT_ASSERT(len1 > 0);
-            dest -= len1;
-            std::move_backward(cursor1 - len1, cursor1, dest + (1 + len1));
-            *dest = std::move(*cursor2);
-        } else {
-            GFX_TIMSORT_ASSERT(len2 != 0 && "Comparison function violates its general contract");
-            GFX_TIMSORT_ASSERT(len1 == 0);
-            GFX_TIMSORT_ASSERT(len2 > 1);
-            std::move(tmp_.begin(), tmp_.begin() + len2, dest - (len2 - 1));
+        if (--len1 == 0) {
+          goto epilogue;
         }
-    }
 
-    void copy_to_tmp(iter_t const begin, diff_t len) {
-        tmp_.assign(std::make_move_iterator(begin),
-                    std::make_move_iterator(begin + len));
+        --minGallop;
+      } while ((count1 >= MIN_GALLOP) | (count2 >= MIN_GALLOP));
+
+      if (minGallop < 0) {
+        minGallop = 0;
+      }
+      minGallop += 2;
+    }  // end of "outer" loop
+
+  epilogue:  // merge what is left from either cursor1 or cursor2
+
+    minGallop_ = (std::min)(minGallop, 1);
+
+    if (len2 == 1) {
+      GFX_TIMSORT_ASSERT(len1 > 0);
+      dest -= len1;
+      std::move_backward(cursor1 - len1, cursor1, dest + (1 + len1));
+      *dest = std::move(*cursor2);
+    } else {
+      GFX_TIMSORT_ASSERT(len2 != 0 &&
+                         "Comparison function violates its general contract");
+      GFX_TIMSORT_ASSERT(len1 == 0);
+      GFX_TIMSORT_ASSERT(len2 > 1);
+      std::move(tmp_.begin(), tmp_.begin() + len2, dest - (len2 - 1));
     }
+  }
 
-public:
+  void copy_to_tmp(iter_t const begin, diff_t len) {
+    tmp_.assign(std::make_move_iterator(begin), std::make_move_iterator(begin + len));
+  }
 
-    static void sort(iter_t const lo, iter_t const hi, Compare compare) {
-        GFX_TIMSORT_ASSERT(lo <= hi);
+ public:
+  static void sort(iter_t const lo, iter_t const hi, Compare compare) {
+    GFX_TIMSORT_ASSERT(lo <= hi);
 
-        diff_t nRemaining = (hi - lo);
-        if (nRemaining < 2) {
-            return; // nothing to do
-        }
+    diff_t nRemaining = (hi - lo);
+    if (nRemaining < 2) {
+      return;  // nothing to do
+    }
 
-        if (nRemaining < MIN_MERGE) {
-            diff_t const initRunLen = countRunAndMakeAscending(lo, hi, compare);
-            GFX_TIMSORT_LOG("initRunLen: " << initRunLen);
-            binarySort(lo, hi, lo + initRunLen, compare);
-            return;
-        }
+    if (nRemaining < MIN_MERGE) {
+      diff_t const initRunLen = countRunAndMakeAscending(lo, hi, compare);
+      GFX_TIMSORT_LOG("initRunLen: " << initRunLen);
+      binarySort(lo, hi, lo + initRunLen, compare);
+      return;
+    }
 
-        TimSort ts;
-        diff_t const minRun = minRunLength(nRemaining);
-        iter_t cur = lo;
-        do {
-            diff_t runLen = countRunAndMakeAscending(cur, hi, compare);
+    TimSort ts;
+    diff_t const minRun = minRunLength(nRemaining);
+    iter_t cur = lo;
+    do {
+      diff_t runLen = countRunAndMakeAscending(cur, hi, compare);
 
-            if (runLen < minRun) {
-                diff_t const force = (std::min)(nRemaining, minRun);
-                binarySort(cur, cur + force, cur + runLen, compare);
-                runLen = force;
-            }
+      if (runLen < minRun) {
+        diff_t const force = (std::min)(nRemaining, minRun);
+        binarySort(cur, cur + force, cur + runLen, compare);
+        runLen = force;
+      }
 
-            ts.pushRun(cur, runLen);
-            ts.mergeCollapse(compare);
+      ts.pushRun(cur, runLen);
+      ts.mergeCollapse(compare);
 
-            cur += runLen;
-            nRemaining -= runLen;
-        } while (nRemaining != 0);
+      cur += runLen;
+      nRemaining -= runLen;
+    } while (nRemaining != 0);
 
-        GFX_TIMSORT_ASSERT(cur == hi);
-        ts.mergeForceCollapse(compare);
-        GFX_TIMSORT_ASSERT(ts.pending_.size() == 1);
+    GFX_TIMSORT_ASSERT(cur == hi);
+    ts.mergeForceCollapse(compare);
+    GFX_TIMSORT_ASSERT(ts.pending_.size() == 1);
 
-        GFX_TIMSORT_LOG("size: " << (hi - lo) << " tmp_.size(): " << ts.tmp_.size()
-                                 << " pending_.size(): " << ts.pending_.size());
-    }
+    GFX_TIMSORT_LOG("size: " << (hi - lo) << " tmp_.size(): " << ts.tmp_.size()
+                             << " pending_.size(): " << ts.pending_.size());
+  }
 };
 
-} // namespace detail
-
+}  // namespace detail
 
 // ---------------------------------------
 // Public interface implementation
@@ -687,17 +684,18 @@ template <typename RandomAccessIterator, typename Compare> class TimSort {
 template <typename RandomAccessIterator, typename Compare, typename Projection>
 void timsort(RandomAccessIterator const first, RandomAccessIterator const last,
              Compare compare, Projection projection) {
-    typedef detail::projection_compare<Compare, Projection> compare_t;
-    compare_t comp(std::move(compare), std::move(projection));
-    detail::TimSort<RandomAccessIterator, compare_t>::sort(first, last, std::move(comp));
+  typedef detail::projection_compare<Compare, Projection> compare_t;
+  compare_t comp(std::move(compare), std::move(projection));
+  detail::TimSort<RandomAccessIterator, compare_t>::sort(first, last, std::move(comp));
 }
 
 /**
  * Same as std::stable_sort(first, last, compare).
  */
 template <typename RandomAccessIterator, typename Compare>
-void timsort(RandomAccessIterator const first, RandomAccessIterator const last, Compare compare) {
-    gfx::timsort(first, last, compare, detail::identity());
+void timsort(RandomAccessIterator const first, RandomAccessIterator const last,
+             Compare compare) {
+  gfx::timsort(first, last, compare, detail::identity());
 }
 
 /**
@@ -705,39 +703,39 @@ void timsort(RandomAccessIterator const first, RandomAccessIterator const last,
  */
 template <typename RandomAccessIterator>
 void timsort(RandomAccessIterator const first, RandomAccessIterator const last) {
-    typedef typename std::iterator_traits<RandomAccessIterator>::value_type value_type;
-    gfx::timsort(first, last, std::less<value_type>(), detail::identity());
+  typedef typename std::iterator_traits<RandomAccessIterator>::value_type value_type;
+  gfx::timsort(first, last, std::less<value_type>(), detail::identity());
 }
 
 /**
  * Stably sorts a range with a comparison function and a projection function.
  */
 template <typename RandomAccessRange, typename Compare, typename Projection>
-void timsort(RandomAccessRange &range, Compare compare, Projection projection) {
-    gfx::timsort(std::begin(range), std::end(range), compare, projection);
+void timsort(RandomAccessRange& range, Compare compare, Projection projection) {
+  gfx::timsort(std::begin(range), std::end(range), compare, projection);
 }
 
 /**
  * Same as std::stable_sort(std::begin(range), std::end(range), compare).
  */
 template <typename RandomAccessRange, typename Compare>
-void timsort(RandomAccessRange &range, Compare compare) {
-    gfx::timsort(std::begin(range), std::end(range), compare);
+void timsort(RandomAccessRange& range, Compare compare) {
+  gfx::timsort(std::begin(range), std::end(range), compare);
 }
 
 /**
  * Same as std::stable_sort(std::begin(range), std::end(range)).
  */
 template <typename RandomAccessRange>
-void timsort(RandomAccessRange &range) {
-    gfx::timsort(std::begin(range), std::end(range));
+void timsort(RandomAccessRange& range) {
+  gfx::timsort(std::begin(range), std::end(range));
 }
 
-} // namespace gfx
+}  // namespace gfx
 
 #undef GFX_TIMSORT_ENABLE_ASSERT
 #undef GFX_TIMSORT_ASSERT
 #undef GFX_TIMSORT_ENABLE_LOG
 #undef GFX_TIMSORT_LOG
 
-#endif // GFX_TIMSORT_HPP
+#endif  // GFX_TIMSORT_HPP
diff --git a/native-sql-engine/tools/formatcppcode.sh b/native-sql-engine/tools/formatcppcode.sh
new file mode 100644
index 000000000..35cc75f39
--- /dev/null
+++ b/native-sql-engine/tools/formatcppcode.sh
@@ -0,0 +1 @@
+find . -regex '.*\.\(cc\|hpp\|cu\|c\|h\)' -exec clang-format -style=file -i {} \;