From bbfc0d581324a4652873dbdb2549a23666d9d7c9 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Sat, 6 Feb 2016 12:08:31 -0800 Subject: [PATCH] PARQUET-507: Reduce the runtime of rle-test I twiddled this a bit to cut the runtime in half. I'd like to reduce it further but looking for feedback -- my preference would be to use system entropy (`std::random_device`) to seed the PRNG and print the seed on failure. So we could run far fewer tests (e.g. only 50 or 100 or so) and occasionally run into flakiness or failure if we refactor and break something internally. Thoughts? Author: Wes McKinney Closes #37 from wesm/PARQUET-507 and squashes the following commits: d75f2ed [Wes McKinney] Tidying per comments 0ed951a [Wes McKinney] Buglet ba97491 [Wes McKinney] Further shorten random tests; use device entropy and print random seed on failure a357dd1 [Wes McKinney] Preallocate vector in BitRle.Random and run half as many iterations Change-Id: I8454ada4337cf98793294632e14ebe96f73c5111 --- cpp/src/parquet/util/rle-test.cc | 59 +++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 9 deletions(-) diff --git a/cpp/src/parquet/util/rle-test.cc b/cpp/src/parquet/util/rle-test.cc index b2628e981d6e1..df020f511eb72 100644 --- a/cpp/src/parquet/util/rle-test.cc +++ b/cpp/src/parquet/util/rle-test.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -204,6 +205,32 @@ void ValidateRle(const vector& values, int bit_width, } } +// A version of ValidateRle that round-trips the values and returns false if +// the returned values are not all the same +bool CheckRoundTrip(const vector& values, int bit_width) { + const int len = 64 * 1024; + uint8_t buffer[len]; + RleEncoder encoder(buffer, len, bit_width); + for (int i = 0; i < values.size(); ++i) { + bool result = encoder.Put(values[i]); + if (!result) { + return false; + } + } + int encoded_len = encoder.Flush(); + int out; + + RleDecoder decoder(buffer, len, bit_width); + for (int i = 0; i < values.size(); ++i) { + uint64_t val; + bool result = decoder.Get(&out); + if (values[i] != out) { + return false; + } + } + return true; +} + TEST(Rle, SpecificSequences) { const int len = 1024; uint8_t expected_buffer[len]; @@ -317,15 +344,27 @@ TEST(BitRle, Flush) { // Test some random sequences. TEST(BitRle, Random) { - int iters = 0; - while (iters < 1000) { - srand(iters++); - if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters; - vector values; + size_t niters = 50; + size_t ngroups = 1000; + size_t max_group_size = 16; + vector values(ngroups + max_group_size); + + // prng setup + std::random_device rd; + std::uniform_int_distribution dist(1, 20); + + uint32_t seed = 0; + for (int iter = 0; iter < niters; ++iter) { + // generate a seed with device entropy + uint32_t seed = rd(); + std::mt19937 gen(seed); + bool parity = 0; - for (int i = 0; i < 1000; ++i) { - int group_size = rand() % 20 + 1; // NOLINT - if (group_size > 16) { + values.resize(0); + + for (int i = 0; i < ngroups; ++i) { + int group_size = dist(gen); + if (group_size > max_group_size) { group_size = 1; } for (int i = 0; i < group_size; ++i) { @@ -333,7 +372,9 @@ TEST(BitRle, Random) { } parity = !parity; } - ValidateRle(values, (iters % MAX_WIDTH) + 1, NULL, -1); + if (!CheckRoundTrip(values, BitUtil::NumRequiredBits(values.size()))) { + FAIL() << "failing seed: " << seed; + } } }