Skip to content

Commit

Permalink
Add benchmark for CSVParser with Atof and AtofPrecise.
Browse files Browse the repository at this point in the history
  • Loading branch information
cyfdecyf committed Mar 18, 2021
1 parent 67e2b11 commit 4f8639a
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 0 deletions.
18 changes: 18 additions & 0 deletions tests/benchmark/parser/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
cmake_minimum_required(VERSION 3.0)

project(benchmark)

OPTION(USE_PRECISE_TEXT_PARSER "Use precise (and faster) double parser for text input file" OFF)

if(USE_PRECISE_TEXT_PARSER)
ADD_DEFINITIONS(-DUSE_PRECISE_TEXT_PARSER)
endif(USE_PRECISE_TEXT_PARSER)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O3")

include_directories(${CMAKE_SOURCE_DIR}/../../../include)
include_directories(${CMAKE_SOURCE_DIR}/../../../src)
#link_directories(${CMAKE_SOURCE_DIR}/lib)

add_executable(parser parser.cpp)
38 changes: 38 additions & 0 deletions tests/benchmark/parser/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
This is a simple benchmark comparing performance of `Common::Atof`
and `Common::AtofPrecise` when used in `CSVParser`.

Just run `./run_parser_benchmark.sh` in this directory.

The test script generates 20000 rows, 2000 columns csv, 840MB file size.

For this test, `Common::Atof` is much faster than `Common::AtofPrecise`.

Benchmark run output on Intel Xeon 2640 v3:

```
========== Benchmark run Atof parser ========== real 0m2.027s user 0m1.822s
real 0m2.027s
user 0m1.822s
sys 0m0.204s
real 0m2.186s
user 0m1.998s
sys 0m0.188s
real 0m2.202s
user 0m2.010s
sys 0m0.192s
========== Benchmark run AtofPrecise parser ==========
real 0m6.556s
user 0m6.324s
sys 0m0.232s
real 0m6.648s
user 0m6.496s
sys 0m0.152s
real 0m6.912s
user 0m6.748s
sys 0m0.164s
```
17 changes: 17 additions & 0 deletions tests/benchmark/parser/gen_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import sys

import numpy as np


def gen_csv(fname, nrow, ncol):
nrow = int(nrow)
ncol = int(ncol)

arr = np.random.random(nrow * ncol) * 5
arr = arr.reshape((nrow, ncol))
np.savetxt(fname, arr, fmt='%.19f', delimiter=',')


if __name__ == '__main__':
import argh
argh.dispatch_command(gen_csv)
49 changes: 49 additions & 0 deletions tests/benchmark/parser/parser.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// This is a very simple benchmark for comparing performance of Atof and AtofPrecise.

#include <cstdlib>
#include <string>
#include <fstream>
#include <iostream>

#include <io/parser.hpp>

namespace LightGBM {

void ParseCSV(const std::string& fpath, int ncol) {
CSVParser parser(-1, ncol);

std::ifstream infile(fpath);
if (! infile) {
std::cerr << "fail to open " << fpath;
std::exit(1);
}

std::string line;
double label;
std::vector<std::pair<int, double>> oneline_features;
while (getline(infile, line)) {
parser.ParseOneLine(line.c_str(), &oneline_features, &label);
// printf("%f\n", oneline_features[0].second);
oneline_features.clear();
}
}

} // namespace LightGBM

int main(int argc, const char* argv[]) {
if (argc != 3) {
printf("usage: parser <fname> <ncol>\n");
exit(1);
}

const char* fpath = argv[1];
long ncol = strtol(argv[2], nullptr, 10);
if (errno != 0) {
fprintf(stderr, "fail to parse ncol\n");
exit(1);
}

LightGBM::ParseCSV(fpath, ncol);

return 0;
}
43 changes: 43 additions & 0 deletions tests/benchmark/parser/run_parser_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash

set -e

nrow=20000
ncol=2000

build() {
d=$1
flag=$2
echo "building $d $flag"
test -d $d || (mkdir -p $d && cd $d && cmake $flag ..)
pushd $d
make
popd
}

gen_data() {
if [[ ! -f test.csv ]]; then
echo "generating csv ..."
python gen_csv.py test.csv $nrow $ncol
fi
}

parser_benchmark() {
echo "========== Benchmark run Atof parser =========="
for i in {1..3}; do
# /usr/bin/time ./build/parser test.csv $ncol
time ./build/parser test.csv $ncol
done

echo
echo "========== Benchmark run AtofPrecise parser =========="
for i in {1..3}; do
# /usr/bin/time ./build-precise/parser test.csv $ncol
time ./build-precise/parser test.csv $ncol
done
}

build build ""
build build-precise "-DUSE_PRECISE_TEXT_PARSER=on"
gen_data
parser_benchmark

0 comments on commit 4f8639a

Please sign in to comment.