Skip to content

Commit

Permalink
Merge pull request #2 from sundy-li/databend
Browse files Browse the repository at this point in the history
improve(databend): use latest databend version and correct some numbers
  • Loading branch information
alexey-milovidov authored Jul 13, 2022
2 parents 794becb + 3664141 commit 8181fdc
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 71 deletions.
42 changes: 15 additions & 27 deletions databend/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/bin/bash

mkdir databend && cd databend
curl -LJO 'https://github.com/datafuselabs/databend/releases/download/v0.7.113-nightly/databend-v0.7.113-nightly-x86_64-unknown-linux-musl.tar.gz'
tar xzvf 'databend-v0.7.113-nightly-x86_64-unknown-linux-musl.tar.gz'
curl -LJO 'https://github.com/datafuselabs/databend/releases/download/v0.7.127-nightly/databend-v0.7.127-nightly-x86_64-unknown-linux-musl.tar.gz'
tar xzvf 'databend-v0.7.127-nightly-x86_64-unknown-linux-musl.tar.gz'

echo 'dir = "metadata/_logs"
admin_api_address = "127.0.0.1:8101"
Expand Down Expand Up @@ -68,38 +68,26 @@ curl https://clickhouse.com/ | sh
sudo ./clickhouse install

# Load the data
# Docs: https://databend.rs/doc/learn/analyze-hits-dataset-with-databend

curl 'http://default@localhost:8124/' --data-binary @create.sql
curl 'http://default@localhost:8125/' --data-binary @create.sql

wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz'
gzip -d hits.csv.gz
wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
gzip -d hits.tsv.gz

# Note: if I run
# Note:
# clickhouse-client --time --query "INSERT INTO hits FORMAT TSV" < hits.tsv
# it panics:
# ERROR common_tracing::panic_hook: panicked at 'called `Result::unwrap()` on an `Err` value: SendError
# can work but it's a bit slower than streaming load

# Note: if I run
# curl -XPUT 'http://root:@127.0.0.1:8000/v1/streaming_load' -H 'insert_sql: insert into hits format CSV' -H 'skip_header: 0' -H 'field_delimiter: ,' -H 'record_delimiter: \n' -F 'upload=@"./hits.csv"'
# curl: (55) Send failure: Broken pipe
time curl -XPUT 'http://root:@127.0.0.1:8081/v1/streaming_load' -H 'insert_sql: insert into hits format TSV' -H 'skip_header: 0' -H 'field_delimiter: \t' -H 'record_delimiter: \n' -F 'upload=@"./hits.tsv"'

# This is not entirely correct, but starts to work:
# curl -XPUT 'http://root:@127.0.0.1:8000/v1/streaming_load' -H 'insert_sql: insert into hits format TSV' -H 'skip_header: 0' -H 'field_delimiter: \t' -H 'record_delimiter: \n' -F 'upload=@"./hits.tsv"'
# and fails after 7 minutes 38 seconds without loading any data:
# Code: 4000, displayText = invalid data (Expected to have terminated string literal.) (while in processor thread 5).
# the diagnostics is terrible.
# {"id":"b9e20026-7eb2-4f09-a6b3-3ab79c4cb1fd","state":"SUCCESS","stats":{"rows":99997497,"bytes":81443407174},"error":null}
# real 8m17.103s

head -n 90000000 hits.tsv > hits90m.tsv
time curl -XPUT 'http://root:@127.0.0.1:8000/v1/streaming_load' -H 'insert_sql: insert into hits format TSV' -H 'skip_header: 0' -H 'field_delimiter: \t' -H 'record_delimiter: \n' -F 'upload=@"./hits90m.tsv"'
wc -l hits.tsv 1
# 99997497 hits.tsv

# {"id":"08f59e6c-2924-483e-bb96-cbcb458588f5","state":"SUCCESS","stats":{"rows":90000000,"bytes":73152552024},"error":null}
# real 7m15.312s

du -bcs _data
# 38714978944

# It does not support ClickHouse protocol well (it hangs on some queries if they are too long).
du -bcs benddata
# 15376458490

./run.sh 2>&1 | tee log.txt

# Note: divide every number by 0.9 as only 90% of the data was loaded successfully.
88 changes: 44 additions & 44 deletions databend/results/c6a.4xlarge.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,56 +3,56 @@
"date": "2022-07-01",
"machine": "c6a.4xlarge, 500gb gp2",
"cluster_size": 1,
"comment": "Only 90% of data successfully loaded.",
"comment": "",

"tags": ["Rust", "column-oriented", "ClickHouse derivative"],

"load_time": 484,
"data_size": 43016643271,
"load_time": 497,
"data_size": 15376458490,

"result": [
[0.010087, 0.002961, 0.003271],
[0.127964, 0.080012, 0.075741],
[0.162388, 0.143967, 0.144762],
[0.252904, 0.217471, 0.217369],
[34.281026, 34.844158, 34.526942],
[25.290307, 25.793068, 25.620563],
[0.112484, 0.093867, 0.090891],
[0.086604, 0.07796, 0.076448],
[20.723203, 20.7483, 20.354869],
[20.81994, 20.72446, 20.696573],
[1.964378, 1.93559, 1.893824],
[1.846866, 1.789111, 1.763664],
[4.468158, 4.407959, 4.438036],
[19.947276, 19.8859, 19.853514],
[5.478573, 5.474461, 5.460604],
[5.509521, 5.513413, 5.363123],
[15.430359, 15.5406, 15.461211],
[14.905998, 15.029721, 15.019642],
[31.069663, 30.811763, 30.737336],
[0.281067, 0.220021, 0.217741],
[8.89374, 4.12692, 4.131689],
[10.38448, 4.603694, 4.571757],
[19.980572, 8.836322, 8.892694],
[59.786474, 52.452881, 39.941988],
[2.804019, 0.994794, 0.958224],
[0.765299, 0.730434, 0.723964],
[2.784648, 0.94665, 0.936684],
[8.905027, 5.418438, 5.386109],
[12.187652, 12.230066, 12.164123],
[3.35748, 3.395991, 3.319434],
[4.309389, 3.854977, 3.772506],
[9.958201, 7.027432, 6.888253],
[50.200569, 50.535126, 50.283066],
[24.469412, 21.222713, 21.010188],
[26.115852, 23.93507, 24.835342],
[7.511517, 7.296179, 7.324549],
[2.156784, 1.298258, 1.278441],
[2.155447, 1.314499, 1.331237],
[2.007053, 1.181676, 1.155612],
[0.013902, 0.004422, 0.004122],
[0.101888, 0.104271, 0.073037],
[0.168212, 0.141071, 0.142650],
[0.243747, 0.203377, 0.204867],
[33.140331, 34.229988, 33.679454],
[26.319933, 27.272582, 25.767886],
[0.110273, 0.090036, 0.092146],
[0.084274, 0.076594, 0.074711],
[19.987245, 20.475889, 20.303076],
[20.374940, 20.468011, 20.483538],
[1.963584, 1.907614, 1.869584],
[1.894976, 1.876273, 1.879085],
[4.593503, 4.507381, 4.462135],
[19.678799, 19.799023, 19.694602],
[5.575033, 5.484197, 5.419428],
[6.095004, 6.054998, 6.095125],
[14.886180, 15.040093, 15.089891],
[14.785431, 14.593298, 14.753010],
[29.590730, 29.849316, 29.661235],
[0.299483, 0.224117, 0.214859],
[9.179294, 4.006400, 3.993079],
[10.670255, 4.430619, 4.398151],
[20.355242, 8.688529, 8.802367],
[60.704942, 55.980810, 56.102004],
[2.940611, 0.889858, 0.897108],
[0.744909, 0.703389, 0.700635],
[2.906223, 0.887556, 0.895110],
[9.252901, 5.417411, 5.492932],
[12.345430, 12.032540, 11.790489],
[3.465527, 3.389873, 3.372103],
[4.263470, 3.611936, 3.615036],
[9.795959, 6.932302, 6.912054],
[48.752064, 49.549056, 49.086483],
[26.008925, 22.072019, 21.553101],
[27.521008, 25.469400, 25.627924],
[7.262501, 7.209355, 7.242376],
[1.880355, 1.138028, 1.171569],
[1.881614, 1.146638, 1.164525],
[1.750486, 0.956581, 1.048284],
[null, null, null],
[0.485363, 0.420291, 0.416819],
[0.372131, 0.322068, 0.323578],
[0.406606, 0.351536, 0.367830],
[0.314160, 0.290318, 0.271116],
[null, null, null]
]
}

0 comments on commit 8181fdc

Please sign in to comment.