forked from microsoft/LightGBM
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add benchmark for CSVParser with Atof and AtofPrecise.
- Loading branch information
Showing
5 changed files
with
165 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
cmake_minimum_required(VERSION 3.0) | ||
|
||
project(benchmark) | ||
|
||
OPTION(USE_PRECISE_TEXT_PARSER "Use precise (and faster) double parser for text input file" OFF) | ||
|
||
if(USE_PRECISE_TEXT_PARSER) | ||
ADD_DEFINITIONS(-DUSE_PRECISE_TEXT_PARSER) | ||
endif(USE_PRECISE_TEXT_PARSER) | ||
|
||
set(CMAKE_CXX_STANDARD 11) | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O3") | ||
|
||
include_directories(${CMAKE_SOURCE_DIR}/../../../include) | ||
include_directories(${CMAKE_SOURCE_DIR}/../../../src) | ||
#link_directories(${CMAKE_SOURCE_DIR}/lib) | ||
|
||
add_executable(parser parser.cpp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
This is a simple benchmark comparing performance of `Common::Atof` | ||
and `Common::AtofPrecise` when used in `CSVParser`. | ||
|
||
Just run `./run_parser_benchmark.sh` in this directory. | ||
|
||
The test script generates 20000 rows, 2000 columns csv, 840MB file size. | ||
|
||
For this test, `Common::Atof` is much faster than `Common::AtofPrecise`. | ||
|
||
Benchmark run output on Intel Xeon 2640 v3: | ||
|
||
``` | ||
========== Benchmark run Atof parser ========== real 0m2.027s user 0m1.822s | ||
real 0m2.027s | ||
user 0m1.822s | ||
sys 0m0.204s | ||
real 0m2.186s | ||
user 0m1.998s | ||
sys 0m0.188s | ||
real 0m2.202s | ||
user 0m2.010s | ||
sys 0m0.192s | ||
========== Benchmark run AtofPrecise parser ========== | ||
real 0m6.556s | ||
user 0m6.324s | ||
sys 0m0.232s | ||
real 0m6.648s | ||
user 0m6.496s | ||
sys 0m0.152s | ||
real 0m6.912s | ||
user 0m6.748s | ||
sys 0m0.164s | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import sys | ||
|
||
import numpy as np | ||
|
||
|
||
def gen_csv(fname, nrow, ncol): | ||
nrow = int(nrow) | ||
ncol = int(ncol) | ||
|
||
arr = np.random.random(nrow * ncol) * 5 | ||
arr = arr.reshape((nrow, ncol)) | ||
np.savetxt(fname, arr, fmt='%.19f', delimiter=',') | ||
|
||
|
||
if __name__ == '__main__': | ||
import argh | ||
argh.dispatch_command(gen_csv) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
// This is a very simple benchmark for comparing performance of Atof and AtofPrecise. | ||
|
||
#include <cstdlib> | ||
#include <string> | ||
#include <fstream> | ||
#include <iostream> | ||
|
||
#include <io/parser.hpp> | ||
|
||
namespace LightGBM { | ||
|
||
void ParseCSV(const std::string& fpath, int ncol) { | ||
CSVParser parser(-1, ncol); | ||
|
||
std::ifstream infile(fpath); | ||
if (! infile) { | ||
std::cerr << "fail to open " << fpath; | ||
std::exit(1); | ||
} | ||
|
||
std::string line; | ||
double label; | ||
std::vector<std::pair<int, double>> oneline_features; | ||
while (getline(infile, line)) { | ||
parser.ParseOneLine(line.c_str(), &oneline_features, &label); | ||
// printf("%f\n", oneline_features[0].second); | ||
oneline_features.clear(); | ||
} | ||
} | ||
|
||
} // namespace LightGBM | ||
|
||
int main(int argc, const char* argv[]) { | ||
if (argc != 3) { | ||
printf("usage: parser <fname> <ncol>\n"); | ||
exit(1); | ||
} | ||
|
||
const char* fpath = argv[1]; | ||
long ncol = strtol(argv[2], nullptr, 10); | ||
if (errno != 0) { | ||
fprintf(stderr, "fail to parse ncol\n"); | ||
exit(1); | ||
} | ||
|
||
LightGBM::ParseCSV(fpath, ncol); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#!/bin/bash | ||
|
||
set -e | ||
|
||
nrow=20000 | ||
ncol=2000 | ||
|
||
build() { | ||
d=$1 | ||
flag=$2 | ||
echo "building $d $flag" | ||
test -d $d || (mkdir -p $d && cd $d && cmake $flag ..) | ||
pushd $d | ||
make | ||
popd | ||
} | ||
|
||
gen_data() { | ||
if [[ ! -f test.csv ]]; then | ||
echo "generating csv ..." | ||
python gen_csv.py test.csv $nrow $ncol | ||
fi | ||
} | ||
|
||
parser_benchmark() { | ||
echo "========== Benchmark run Atof parser ==========" | ||
for i in {1..3}; do | ||
# /usr/bin/time ./build/parser test.csv $ncol | ||
time ./build/parser test.csv $ncol | ||
done | ||
|
||
echo | ||
echo "========== Benchmark run AtofPrecise parser ==========" | ||
for i in {1..3}; do | ||
# /usr/bin/time ./build-precise/parser test.csv $ncol | ||
time ./build-precise/parser test.csv $ncol | ||
done | ||
} | ||
|
||
build build "" | ||
build build-precise "-DUSE_PRECISE_TEXT_PARSER=on" | ||
gen_data | ||
parser_benchmark |