Skip to content

Commit

Permalink
PARQUET-496: Fix cpplint configuration to catch more style errors
Browse files Browse the repository at this point in the history
We were using `--verbose=4` which catches only egregious style errors. `--verbose=2` with a few warning types suppressed is much more reasonable. This patch updates the cmake configuration and fixes the existing style errors to get the build passing.

Author: Wes McKinney <wes@cloudera.com>

Closes apache#33 from wesm/PARQUET-496 and squashes the following commits:

de674d4 [Wes McKinney] Use --verbose=2 for cpplint and fix outstanding style issues
  • Loading branch information
wesm committed Sep 2, 2018
1 parent ab5f61b commit ce2e746
Show file tree
Hide file tree
Showing 10 changed files with 22 additions and 22 deletions.
10 changes: 6 additions & 4 deletions cpp/src/parquet/column/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ ColumnReader::ColumnReader(const parquet::ColumnMetaData* metadata,
stream_(std::move(stream)),
num_buffered_values_(0),
num_decoded_values_(0) {

switch (metadata->codec) {
case CompressionCodec::UNCOMPRESSED:
break;
Expand Down Expand Up @@ -103,7 +102,8 @@ bool TypedColumnReader<TYPE>::ReadNewPage() {
PlainDecoder<TYPE> dictionary(schema_);
dictionary.SetData(current_page_header_.dictionary_page_header.num_values,
buffer, uncompressed_len);
std::shared_ptr<DecoderType> decoder(new DictionaryDecoder<TYPE>(schema_, &dictionary));
std::shared_ptr<DecoderType> decoder(
new DictionaryDecoder<TYPE>(schema_, &dictionary));

decoders_[Encoding::RLE_DICTIONARY] = decoder;
current_decoder_ = decoders_[Encoding::RLE_DICTIONARY].get();
Expand Down Expand Up @@ -222,9 +222,11 @@ std::shared_ptr<ColumnReader> ColumnReader::Make(const parquet::ColumnMetaData*
case Type::DOUBLE:
return std::make_shared<DoubleReader>(metadata, element, std::move(stream));
case Type::BYTE_ARRAY:
return std::make_shared<ByteArrayReader>(metadata, element, std::move(stream));
return std::make_shared<ByteArrayReader>(metadata, element,
std::move(stream));
case Type::FIXED_LEN_BYTE_ARRAY:
return std::make_shared<FixedLenByteArrayReader>(metadata, element, std::move(stream));
return std::make_shared<FixedLenByteArrayReader>(metadata, element,
std::move(stream));
default:
ParquetException::NYI("type reader not implemented");
}
Expand Down
1 change: 0 additions & 1 deletion cpp/src/parquet/column/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ class Scanner;

class ColumnReader {
public:

struct Config {
int batch_size;

Expand Down
1 change: 0 additions & 1 deletion cpp/src/parquet/column/scanner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ std::shared_ptr<Scanner> Scanner::Make(std::shared_ptr<ColumnReader> col_reader,
}
// Unreachable code, but supress compiler warning
return std::shared_ptr<Scanner>(nullptr);

}

} // namespace parquet_cpp
8 changes: 4 additions & 4 deletions cpp/src/parquet/column/scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ class TypedScanner : public Scanner {

bool NextLevels(int16_t* def_level, int16_t* rep_level) {
if (level_offset_ == levels_buffered_) {
levels_buffered_ = typed_reader_->ReadBatch(batch_size_, &def_levels_[0], &rep_levels_[0],
values_, &values_buffered_);
levels_buffered_ = typed_reader_->ReadBatch(batch_size_, &def_levels_[0],
&rep_levels_[0], values_, &values_buffered_);

// TODO: repetition levels

Expand Down Expand Up @@ -151,9 +151,9 @@ class TypedScanner : public Scanner {

if (is_null) {
std::string null_fmt = format_fwf<parquet::Type::BYTE_ARRAY>(width);
snprintf(buffer, 25, null_fmt.c_str(), "NULL");
snprintf(buffer, sizeof(buffer), null_fmt.c_str(), "NULL");
} else {
FormatValue(&val, buffer, 25, width);
FormatValue(&val, buffer, sizeof(buffer), width);
}
out << buffer;
}
Expand Down
2 changes: 0 additions & 2 deletions cpp/src/parquet/encodings/delta-bit-pack-encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class DeltaBitPackDecoder : public Decoder<TYPE> {

explicit DeltaBitPackDecoder(const parquet::SchemaElement* schema)
: Decoder<TYPE>(schema, parquet::Encoding::DELTA_BINARY_PACKED) {

parquet::Type::type type = type_traits<TYPE>::parquet_type;

if (type != parquet::Type::INT32 && type != parquet::Type::INT64) {
Expand Down Expand Up @@ -117,7 +116,6 @@ class DeltaBitPackDecoder : public Decoder<TYPE> {

int64_t last_value_;
};

} // namespace parquet_cpp

#endif
4 changes: 2 additions & 2 deletions cpp/src/parquet/encodings/plain-encoding.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ inline int PlainDecoder<parquet::Type::BYTE_ARRAY>::Decode(ByteArray* buffer,

// Template specialization for FIXED_LEN_BYTE_ARRAY
template <>
inline int PlainDecoder<parquet::Type::FIXED_LEN_BYTE_ARRAY>::Decode(FixedLenByteArray* buffer,
int max_values) {
inline int PlainDecoder<parquet::Type::FIXED_LEN_BYTE_ARRAY>::Decode(
FixedLenByteArray* buffer, int max_values) {
max_values = std::min(max_values, num_values_);
int len = schema_->type_length;
for (int i = 0; i < max_values; ++i) {
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/parquet/reader-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ TEST_F(TestAllTypesPlain, TestBatchRead) {
RowGroupReader* group = reader_.RowGroup(0);

// column 0, id
std::shared_ptr<Int32Reader> col = std::dynamic_pointer_cast<Int32Reader>(group->Column(0));
std::shared_ptr<Int32Reader> col =
std::dynamic_pointer_cast<Int32Reader>(group->Column(0));

int16_t def_levels[4];
int16_t rep_levels[4];
Expand Down Expand Up @@ -120,7 +121,7 @@ TEST_F(TestAllTypesPlain, DebugPrintWorks) {
reader_.DebugPrint(ss);

std::string result = ss.str();
ASSERT_TRUE(result.size() > 0);
ASSERT_GT(result.size(), 0);
}

} // namespace parquet_cpp
10 changes: 6 additions & 4 deletions cpp/src/parquet/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ std::shared_ptr<ColumnReader> RowGroupReader::Column(size_t i) {
source->Seek(col_start);

// TODO(wesm): Law of demeter violation
ScopedInMemoryInputStream* scoped_input = static_cast<ScopedInMemoryInputStream*>(input.get());
ScopedInMemoryInputStream* scoped_input =
static_cast<ScopedInMemoryInputStream*>(input.get());
size_t bytes_read = source->Read(scoped_input->size(), scoped_input->data());
if (bytes_read != scoped_input->size()) {
std::cout << "Bytes needed: " << col.meta_data.total_compressed_size << std::endl;
Expand Down Expand Up @@ -165,7 +166,8 @@ RowGroupReader* ParquetFileReader::RowGroup(size_t i) {
}

// Construct the RowGroupReader
row_group_readers_[i] = std::make_shared<RowGroupReader>(this, &metadata_.row_groups[i]);
row_group_readers_[i] = std::make_shared<RowGroupReader>(this,
&metadata_.row_groups[i]);
return row_group_readers_[i].get();
}

Expand Down Expand Up @@ -272,9 +274,9 @@ void ParquetFileReader::DebugPrint(std::ostream& stream, bool print_values) {
<< ": " << meta_data->num_values << " rows, "
<< meta_data->statistics.null_count << " null values, "
<< meta_data->statistics.distinct_count << " distinct values, "
<< "min value: " << (meta_data->statistics.min.length()>0 ?
<< "min value: " << (meta_data->statistics.min.length() > 0 ?
meta_data->statistics.min : "N/A")
<< ", max value: " << (meta_data->statistics.max.length()>0 ?
<< ", max value: " << (meta_data->statistics.max.length() > 0 ?
meta_data->statistics.max : "N/A") << ".\n";
}

Expand Down
1 change: 0 additions & 1 deletion cpp/src/parquet/util/bit-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ class BitUtil {
static inline int16_t FromBigEndian(int16_t val) { return val; }
static inline uint16_t FromBigEndian(uint16_t val) { return val; }
#endif

};

} // namespace parquet_cpp
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/parquet/util/input_stream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
namespace parquet_cpp {

InMemoryInputStream::InMemoryInputStream(const uint8_t* buffer, int64_t len) :
buffer_(buffer), len_(len), offset_(0) {}
buffer_(buffer), len_(len), offset_(0) {}

const uint8_t* InMemoryInputStream::Peek(int num_to_peek, int* num_bytes) {
*num_bytes = std::min(static_cast<int64_t>(num_to_peek), len_ - offset_);
Expand Down

0 comments on commit ce2e746

Please sign in to comment.