Skip to content

Commit

Permalink
PARQUET-507: Reduce the runtime of rle-test
Browse files Browse the repository at this point in the history
I twiddled this a bit to cut the runtime in half. I'd like to reduce it further but looking for feedback -- my preference would be to use system entropy (`std::random_device`) to seed the PRNG and print the seed on failure. So we could run far fewer tests (e.g. only 50 or 100 or so) and occasionally run into flakiness or failure if we refactor and break something internally. Thoughts?

Author: Wes McKinney <wes@cloudera.com>

Closes apache#37 from wesm/PARQUET-507 and squashes the following commits:

d75f2ed [Wes McKinney] Tidying per comments
0ed951a [Wes McKinney] Buglet
ba97491 [Wes McKinney] Further shorten random tests; use device entropy and print random seed on failure
a357dd1 [Wes McKinney] Preallocate vector in BitRle.Random and run half as many iterations

Change-Id: I8454ada4337cf98793294632e14ebe96f73c5111
  • Loading branch information
wesm authored and julienledem committed Feb 6, 2016
1 parent 94257f8 commit bbfc0d5
Showing 1 changed file with 50 additions and 9 deletions.
59 changes: 50 additions & 9 deletions cpp/src/parquet/util/rle-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <stdio.h>
#include <cstdint>
#include <iostream>
#include <random>
#include <vector>

#include <boost/utility.hpp>
Expand Down Expand Up @@ -204,6 +205,32 @@ void ValidateRle(const vector<int>& values, int bit_width,
}
}

// A version of ValidateRle that round-trips the values and returns false if
// the returned values are not all the same
bool CheckRoundTrip(const vector<int>& values, int bit_width) {
const int len = 64 * 1024;
uint8_t buffer[len];
RleEncoder encoder(buffer, len, bit_width);
for (int i = 0; i < values.size(); ++i) {
bool result = encoder.Put(values[i]);
if (!result) {
return false;
}
}
int encoded_len = encoder.Flush();
int out;

RleDecoder decoder(buffer, len, bit_width);
for (int i = 0; i < values.size(); ++i) {
uint64_t val;
bool result = decoder.Get(&out);
if (values[i] != out) {
return false;
}
}
return true;
}

TEST(Rle, SpecificSequences) {
const int len = 1024;
uint8_t expected_buffer[len];
Expand Down Expand Up @@ -317,23 +344,37 @@ TEST(BitRle, Flush) {

// Test some random sequences.
TEST(BitRle, Random) {
int iters = 0;
while (iters < 1000) {
srand(iters++);
if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters;
vector<int> values;
size_t niters = 50;
size_t ngroups = 1000;
size_t max_group_size = 16;
vector<int> values(ngroups + max_group_size);

// prng setup
std::random_device rd;
std::uniform_int_distribution<int> dist(1, 20);

uint32_t seed = 0;
for (int iter = 0; iter < niters; ++iter) {
// generate a seed with device entropy
uint32_t seed = rd();
std::mt19937 gen(seed);

bool parity = 0;
for (int i = 0; i < 1000; ++i) {
int group_size = rand() % 20 + 1; // NOLINT
if (group_size > 16) {
values.resize(0);

for (int i = 0; i < ngroups; ++i) {
int group_size = dist(gen);
if (group_size > max_group_size) {
group_size = 1;
}
for (int i = 0; i < group_size; ++i) {
values.push_back(parity);
}
parity = !parity;
}
ValidateRle(values, (iters % MAX_WIDTH) + 1, NULL, -1);
if (!CheckRoundTrip(values, BitUtil::NumRequiredBits(values.size()))) {
FAIL() << "failing seed: " << seed;
}
}
}

Expand Down

0 comments on commit bbfc0d5

Please sign in to comment.