diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt new file mode 100644 index 0000000000000..906eaa6823f54 --- /dev/null +++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt @@ -0,0 +1,188 @@ +================================================================================================ +SQL Single Column Scan +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +SQL Single TINYINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 1192 / 1251 13.2 75.8 1.0X +Native ORC Vectorized 149 / 165 105.3 9.5 8.0X +Native ORC Vectorized with copy 146 / 156 107.7 9.3 8.2X +Hive built-in ORC 1419 / 1424 11.1 90.2 0.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +SQL Single SMALLINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 1168 / 1176 13.5 74.2 1.0X +Native ORC Vectorized 151 / 156 104.2 9.6 7.7X +Native ORC Vectorized with copy 154 / 159 102.3 9.8 7.6X +Hive built-in ORC 1726 / 1742 9.1 109.7 0.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +SQL Single INT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 1203 / 1240 13.1 76.5 1.0X +Native ORC Vectorized 208 / 215 75.7 13.2 5.8X +Native ORC Vectorized with copy 212 / 220 74.3 13.5 5.7X +Hive built-in ORC 1782 / 1820 8.8 113.3 0.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +SQL Single BIGINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 1226 / 1245 12.8 77.9 1.0X +Native ORC Vectorized 261 / 267 60.3 16.6 4.7X +Native ORC Vectorized with copy 263 / 269 59.8 16.7 4.7X +Hive built-in ORC 1804 / 1869 8.7 114.7 0.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +SQL Single FLOAT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 1279 / 1283 12.3 81.3 1.0X +Native ORC Vectorized 297 / 304 52.9 18.9 4.3X +Native ORC Vectorized with copy 297 / 303 53.0 18.9 4.3X +Hive built-in ORC 2086 / 2089 7.5 132.6 0.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +SQL Single DOUBLE Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 1396 / 1506 11.3 88.7 1.0X +Native ORC Vectorized 361 / 375 43.5 23.0 3.9X +Native ORC Vectorized with copy 361 / 381 43.6 22.9 3.9X +Hive built-in ORC 2108 / 2136 7.5 134.0 0.7X + + +================================================================================================ +Int and String Scan +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Int and String Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 3205 / 3216 3.3 305.7 1.0X +Native ORC Vectorized 1978 / 2216 5.3 188.6 1.6X +Native ORC Vectorized with copy 2067 / 2155 5.1 197.1 1.6X +Hive built-in ORC 4373 / 4413 2.4 417.1 0.7X + + +================================================================================================ +Partitioned Table Scan +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Partitioned Table: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Data column - Native ORC MR 1462 / 1487 10.8 93.0 1.0X +Data column - Native ORC Vectorized 269 / 272 58.5 17.1 5.4X +Data column - Native ORC Vectorized with copy 274 / 318 57.4 17.4 5.3X +Data column - Hive built-in ORC 2127 / 2138 7.4 135.2 0.7X +Partition column - Native ORC MR 1092 / 1098 14.4 69.4 1.3X +Partition column - Native ORC Vectorized 52 / 57 304.6 3.3 28.3X +Partition column - Native ORC Vectorized with copy 51 / 58 305.6 3.3 28.4X +Partition column - Hive built-in ORC 1349 / 1353 11.7 85.8 1.1X +Both columns - Native ORC MR 1472 / 1515 10.7 93.6 1.0X +Both columns - Native ORC Vectorized 309 / 328 51.0 19.6 4.7X +Both column - Native ORC Vectorized with copy 356 / 370 44.2 22.6 4.1X +Both columns - Hive built-in ORC 2175 / 2184 7.2 138.3 0.7X + + +================================================================================================ +Repeated String Scan +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Repeated String: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 1313 / 1323 8.0 125.2 1.0X +Native ORC Vectorized 258 / 271 40.7 24.6 5.1X +Native ORC Vectorized with copy 397 / 406 26.4 37.9 3.3X +Hive built-in ORC 1983 / 2009 5.3 189.2 0.7X + + +================================================================================================ +String with Nulls Scan +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +String with Nulls Scan (0.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 2492 / 2721 4.2 237.7 1.0X +Native ORC Vectorized 775 / 807 13.5 73.9 3.2X +Native ORC Vectorized with copy 1009 / 1105 10.4 96.3 2.5X +Hive built-in ORC 3890 / 4018 2.7 371.0 0.6X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +String with Nulls Scan (0.5%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 2278 / 2322 4.6 217.3 1.0X +Native ORC Vectorized 941 / 950 11.1 89.7 2.4X +Native ORC Vectorized with copy 1201 / 1211 8.7 114.6 1.9X +Hive built-in ORC 2903 / 2931 3.6 276.9 0.8X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +String with Nulls Scan (0.95%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 1237 / 1247 8.5 118.0 1.0X +Native ORC Vectorized 334 / 340 31.4 31.9 3.7X +Native ORC Vectorized with copy 412 / 415 25.5 39.3 3.0X +Hive built-in ORC 1656 / 1697 6.3 158.0 0.7X + + +================================================================================================ +Single Column Scan From Wide Columns +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Single Column Scan from 100 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 1105 / 1111 0.9 1053.7 1.0X +Native ORC Vectorized 129 / 135 8.1 122.9 8.6X +Native ORC Vectorized with copy 126 / 140 8.3 120.5 8.7X +Hive built-in ORC 371 / 378 2.8 354.1 3.0X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Single Column Scan from 200 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 2150 / 2169 0.5 2049.9 1.0X +Native ORC Vectorized 231 / 238 4.5 219.8 9.3X +Native ORC Vectorized with copy 225 / 239 4.7 214.9 9.5X +Hive built-in ORC 578 / 588 1.8 551.0 3.7X + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz + +Single Column Scan from 300 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Native ORC MR 3379 / 3414 0.3 3222.6 1.0X +Native ORC Vectorized 375 / 395 2.8 358.1 9.0X +Native ORC Vectorized with copy 390 / 407 2.7 372.1 8.7X +Hive built-in ORC 838 / 846 1.3 799.1 4.0X + + diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala index 49de007df3828..26e0e9aea51e5 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala @@ -22,20 +22,26 @@ import java.io.File import scala.util.Random import org.apache.spark.SparkConf -import org.apache.spark.benchmark.Benchmark +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ - /** * Benchmark to measure ORC read performance. + * {{{ + * To run this benchmark: + * 1. without sbt: bin/spark-submit --class + * 2. build/sbt "sql/test:runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * Results will be written to "benchmarks/OrcReadBenchmark-results.txt". + * }}} * * This is in `sql/hive` module in order to compare `sql/core` and `sql/hive` ORC data sources. */ // scalastyle:off line.size.limit -object OrcReadBenchmark extends SQLHelper { +object OrcReadBenchmark extends BenchmarkBase with SQLHelper { val conf = new SparkConf() conf.set("orc.compression", "snappy") @@ -69,7 +75,7 @@ object OrcReadBenchmark extends SQLHelper { } def numericScanBenchmark(values: Int, dataType: DataType): Unit = { - val benchmark = new Benchmark(s"SQL Single ${dataType.sql} Column Scan", values) + val benchmark = new Benchmark(s"SQL Single ${dataType.sql} Column Scan", values, output = output) withTempPath { dir => withTempTable("t1", "nativeOrcTable", "hiveOrcTable") { @@ -98,59 +104,13 @@ object OrcReadBenchmark extends SQLHelper { spark.sql("SELECT sum(id) FROM hiveOrcTable").collect() } - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - SQL Single TINYINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 1135 / 1171 13.9 72.2 1.0X - Native ORC Vectorized 152 / 163 103.4 9.7 7.5X - Native ORC Vectorized with copy 149 / 162 105.4 9.5 7.6X - Hive built-in ORC 1380 / 1384 11.4 87.7 0.8X - - SQL Single SMALLINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 1182 / 1244 13.3 75.2 1.0X - Native ORC Vectorized 145 / 156 108.7 9.2 8.2X - Native ORC Vectorized with copy 148 / 158 106.4 9.4 8.0X - Hive built-in ORC 1591 / 1636 9.9 101.2 0.7X - - SQL Single INT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 1271 / 1271 12.4 80.8 1.0X - Native ORC Vectorized 206 / 212 76.3 13.1 6.2X - Native ORC Vectorized with copy 200 / 213 78.8 12.7 6.4X - Hive built-in ORC 1776 / 1787 8.9 112.9 0.7X - - SQL Single BIGINT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 1344 / 1355 11.7 85.4 1.0X - Native ORC Vectorized 258 / 268 61.0 16.4 5.2X - Native ORC Vectorized with copy 252 / 257 62.4 16.0 5.3X - Hive built-in ORC 1818 / 1823 8.7 115.6 0.7X - - SQL Single FLOAT Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 1333 / 1352 11.8 84.8 1.0X - Native ORC Vectorized 310 / 324 50.7 19.7 4.3X - Native ORC Vectorized with copy 312 / 320 50.4 19.9 4.3X - Hive built-in ORC 1904 / 1918 8.3 121.0 0.7X - - SQL Single DOUBLE Column Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 1408 / 1585 11.2 89.5 1.0X - Native ORC Vectorized 359 / 368 43.8 22.8 3.9X - Native ORC Vectorized with copy 364 / 371 43.2 23.2 3.9X - Hive built-in ORC 1881 / 1954 8.4 119.6 0.7X - */ benchmark.run() } } } def intStringScanBenchmark(values: Int): Unit = { - val benchmark = new Benchmark("Int and String Scan", values) + val benchmark = new Benchmark("Int and String Scan", values, output = output) withTempPath { dir => withTempTable("t1", "nativeOrcTable", "hiveOrcTable") { @@ -181,24 +141,13 @@ object OrcReadBenchmark extends SQLHelper { spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").collect() } - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - Int and String Scan: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 2566 / 2592 4.1 244.7 1.0X - Native ORC Vectorized 1098 / 1113 9.6 104.7 2.3X - Native ORC Vectorized with copy 1527 / 1593 6.9 145.6 1.7X - Hive built-in ORC 3561 / 3705 2.9 339.6 0.7X - */ benchmark.run() } } } def partitionTableScanBenchmark(values: Int): Unit = { - val benchmark = new Benchmark("Partitioned Table", values) + val benchmark = new Benchmark("Partitioned Table", values, output = output) withTempPath { dir => withTempTable("t1", "nativeOrcTable", "hiveOrcTable") { @@ -267,32 +216,13 @@ object OrcReadBenchmark extends SQLHelper { spark.sql("SELECT sum(p), sum(id) FROM hiveOrcTable").collect() } - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - Partitioned Table: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Data only - Native ORC MR 1447 / 1457 10.9 92.0 1.0X - Data only - Native ORC Vectorized 256 / 266 61.4 16.3 5.6X - Data only - Native ORC Vectorized with copy 263 / 273 59.8 16.7 5.5X - Data only - Hive built-in ORC 1960 / 1988 8.0 124.6 0.7X - Partition only - Native ORC MR 1039 / 1043 15.1 66.0 1.4X - Partition only - Native ORC Vectorized 48 / 53 326.6 3.1 30.1X - Partition only - Native ORC Vectorized with copy 48 / 53 328.4 3.0 30.2X - Partition only - Hive built-in ORC 1234 / 1242 12.7 78.4 1.2X - Both columns - Native ORC MR 1465 / 1475 10.7 93.1 1.0X - Both columns - Native ORC Vectorized 292 / 301 53.9 18.6 5.0X - Both column - Native ORC Vectorized with copy 348 / 354 45.1 22.2 4.2X - Both columns - Hive built-in ORC 2051 / 2060 7.7 130.4 0.7X - */ benchmark.run() } } } def repeatedStringScanBenchmark(values: Int): Unit = { - val benchmark = new Benchmark("Repeated String", values) + val benchmark = new Benchmark("Repeated String", values, output = output) withTempPath { dir => withTempTable("t1", "nativeOrcTable", "hiveOrcTable") { @@ -320,17 +250,6 @@ object OrcReadBenchmark extends SQLHelper { spark.sql("SELECT sum(length(c1)) FROM hiveOrcTable").collect() } - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - Repeated String: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 1271 / 1278 8.3 121.2 1.0X - Native ORC Vectorized 200 / 212 52.4 19.1 6.4X - Native ORC Vectorized with copy 342 / 347 30.7 32.6 3.7X - Hive built-in ORC 1874 / 2105 5.6 178.7 0.7X - */ benchmark.run() } } @@ -347,7 +266,8 @@ object OrcReadBenchmark extends SQLHelper { s"SELECT IF(RAND(1) < $fractionOfNulls, NULL, CAST(id as STRING)) AS c1, " + s"IF(RAND(2) < $fractionOfNulls, NULL, CAST(id as STRING)) AS c2 FROM t1")) - val benchmark = new Benchmark(s"String with Nulls Scan ($fractionOfNulls%)", values) + val benchmark = + new Benchmark(s"String with Nulls Scan ($fractionOfNulls%)", values, output = output) benchmark.addCase("Native ORC MR") { _ => withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { @@ -373,38 +293,13 @@ object OrcReadBenchmark extends SQLHelper { "WHERE c1 IS NOT NULL AND c2 IS NOT NULL").collect() } - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - String with Nulls Scan (0.0%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 2394 / 2886 4.4 228.3 1.0X - Native ORC Vectorized 699 / 729 15.0 66.7 3.4X - Native ORC Vectorized with copy 959 / 1025 10.9 91.5 2.5X - Hive built-in ORC 3899 / 3901 2.7 371.9 0.6X - - String with Nulls Scan (0.5%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 2234 / 2255 4.7 213.1 1.0X - Native ORC Vectorized 854 / 869 12.3 81.4 2.6X - Native ORC Vectorized with copy 1099 / 1128 9.5 104.8 2.0X - Hive built-in ORC 2767 / 2793 3.8 263.9 0.8X - - String with Nulls Scan (0.95%): Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 1166 / 1202 9.0 111.2 1.0X - Native ORC Vectorized 338 / 345 31.1 32.2 3.5X - Native ORC Vectorized with copy 418 / 428 25.1 39.9 2.8X - Hive built-in ORC 1730 / 1761 6.1 164.9 0.7X - */ benchmark.run() } } } def columnsBenchmark(values: Int, width: Int): Unit = { - val benchmark = new Benchmark(s"Single Column Scan from $width columns", values) + val benchmark = new Benchmark(s"Single Column Scan from $width columns", values, output = output) withTempPath { dir => withTempTable("t1", "nativeOrcTable", "hiveOrcTable") { @@ -436,49 +331,36 @@ object OrcReadBenchmark extends SQLHelper { spark.sql(s"SELECT sum(c$middle) FROM hiveOrcTable").collect() } - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - Single Column Scan from 100 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 1050 / 1053 1.0 1001.1 1.0X - Native ORC Vectorized 95 / 101 11.0 90.9 11.0X - Native ORC Vectorized with copy 95 / 102 11.0 90.9 11.0X - Hive built-in ORC 348 / 358 3.0 331.8 3.0X - - Single Column Scan from 200 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 2099 / 2108 0.5 2002.1 1.0X - Native ORC Vectorized 179 / 187 5.8 171.1 11.7X - Native ORC Vectorized with copy 176 / 188 6.0 167.6 11.9X - Hive built-in ORC 562 / 581 1.9 535.9 3.7X - - Single Column Scan from 300 columns: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Native ORC MR 3221 / 3246 0.3 3071.4 1.0X - Native ORC Vectorized 312 / 322 3.4 298.0 10.3X - Native ORC Vectorized with copy 306 / 320 3.4 291.6 10.5X - Hive built-in ORC 815 / 824 1.3 777.3 4.0X - */ benchmark.run() } } } - def main(args: Array[String]): Unit = { - Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType).foreach { dataType => - numericScanBenchmark(1024 * 1024 * 15, dataType) + override def benchmark(): Unit = { + runBenchmark("SQL Single Column Scan") { + Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType).foreach { dataType => + numericScanBenchmark(1024 * 1024 * 15, dataType) + } + } + runBenchmark("Int and String Scan") { + intStringScanBenchmark(1024 * 1024 * 10) + } + runBenchmark("Partitioned Table Scan") { + partitionTableScanBenchmark(1024 * 1024 * 15) + } + runBenchmark("Repeated String Scan") { + repeatedStringScanBenchmark(1024 * 1024 * 10) + } + runBenchmark("String with Nulls Scan") { + for (fractionOfNulls <- List(0.0, 0.50, 0.95)) { + stringWithNullsScanBenchmark(1024 * 1024 * 10, fractionOfNulls) + } } - intStringScanBenchmark(1024 * 1024 * 10) - partitionTableScanBenchmark(1024 * 1024 * 15) - repeatedStringScanBenchmark(1024 * 1024 * 10) - for (fractionOfNulls <- List(0.0, 0.50, 0.95)) { - stringWithNullsScanBenchmark(1024 * 1024 * 10, fractionOfNulls) + runBenchmark("Single Column Scan From Wide Columns") { + columnsBenchmark(1024 * 1024 * 1, 100) + columnsBenchmark(1024 * 1024 * 1, 200) + columnsBenchmark(1024 * 1024 * 1, 300) } - columnsBenchmark(1024 * 1024 * 1, 100) - columnsBenchmark(1024 * 1024 * 1, 200) - columnsBenchmark(1024 * 1024 * 1, 300) } } // scalastyle:on line.size.limit