diff --git a/cpp/src/parquet/encodings/dictionary-encoding.h b/cpp/src/parquet/encodings/dictionary-encoding.h index 19ef1ea7db757..eba9b49ff3ac2 100644 --- a/cpp/src/parquet/encodings/dictionary-encoding.h +++ b/cpp/src/parquet/encodings/dictionary-encoding.h @@ -27,6 +27,7 @@ #include "parquet/encodings/decoder.h" #include "parquet/encodings/encoder.h" #include "parquet/encodings/plain-encoding.h" +#include "parquet/util/cpu-info.h" #include "parquet/util/dict-encoding.h" #include "parquet/util/hash-util.h" #include "parquet/util/mem-pool.h" @@ -203,7 +204,11 @@ class DictEncoderBase { mod_bitmask_(hash_table_size_ - 1), hash_slots_(hash_table_size_, HASH_SLOT_EMPTY), pool_(pool), - dict_encoded_size_(0) {} + dict_encoded_size_(0) { + if (!CpuInfo::initialized()) { + CpuInfo::Init(); + } + } /// Size of the table. Must be a power of 2. int hash_table_size_; @@ -426,6 +431,8 @@ inline int DictEncoderBase::WriteIndices(uint8_t* buffer, int buffer_len) { if (!encoder.Put(index)) return -1; } encoder.Flush(); + + ClearIndices(); return 1 + encoder.len(); } diff --git a/cpp/src/parquet/encodings/encoding-test.cc b/cpp/src/parquet/encodings/encoding-test.cc index f060f967bc5c6..f45736a21ef66 100644 --- a/cpp/src/parquet/encodings/encoding-test.cc +++ b/cpp/src/parquet/encodings/encoding-test.cc @@ -155,6 +155,10 @@ class TestEncodingBase : public ::testing::Test { } } + void TearDown() { + pool_.FreeAll(); + } + void InitData(int nvalues, int repeats) { num_values_ = nvalues * repeats; input_bytes_.resize(num_values_ * sizeof(T)); diff --git a/cpp/src/parquet/util/bit-stream-utils.inline.h b/cpp/src/parquet/util/bit-stream-utils.inline.h index e0dcab871fc54..fc90244947c6b 100644 --- a/cpp/src/parquet/util/bit-stream-utils.inline.h +++ b/cpp/src/parquet/util/bit-stream-utils.inline.h @@ -90,7 +90,7 @@ inline bool BitReader::GetValue(int num_bits, T* v) { DCHECK(buffer_ != NULL); // TODO: revisit this limit if necessary DCHECK_LE(num_bits, 32); - DCHECK_LE(num_bits, sizeof(T) * 8); + DCHECK_LE(num_bits, static_cast(sizeof(T) * 8)); if (UNLIKELY(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false; @@ -118,7 +118,7 @@ inline bool BitReader::GetValue(int num_bits, T* v) { template inline bool BitReader::GetAligned(int num_bytes, T* v) { - DCHECK_LE(num_bytes, sizeof(T)); + DCHECK_LE(num_bytes, static_cast(sizeof(T))); int bytes_read = BitUtil::Ceil(bit_offset_, 8); if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false; diff --git a/cpp/src/parquet/util/bit-util-test.cc b/cpp/src/parquet/util/bit-util-test.cc index 5ea4c119f7b46..90c1167349d66 100644 --- a/cpp/src/parquet/util/bit-util-test.cc +++ b/cpp/src/parquet/util/bit-util-test.cc @@ -32,6 +32,12 @@ namespace parquet_cpp { +static void ensure_cpu_info_initialized() { + if (!CpuInfo::initialized()) { + CpuInfo::Init(); + } +} + TEST(BitUtil, Ceil) { EXPECT_EQ(BitUtil::Ceil(0, 1), 0); EXPECT_EQ(BitUtil::Ceil(1, 1), 1); @@ -71,6 +77,8 @@ TEST(BitUtil, RoundDown) { } TEST(BitUtil, Popcount) { + ensure_cpu_info_initialized(); + EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(0 1 0 1 0 1 0 1)), 4); EXPECT_EQ(BitUtil::PopcountNoHw(BOOST_BINARY(0 1 0 1 0 1 0 1)), 4); EXPECT_EQ(BitUtil::Popcount(BOOST_BINARY(1 1 1 1 0 1 0 1)), 6); diff --git a/cpp/src/parquet/util/cpu-info.h b/cpp/src/parquet/util/cpu-info.h index 9026cde6630ec..e293418733c43 100644 --- a/cpp/src/parquet/util/cpu-info.h +++ b/cpp/src/parquet/util/cpu-info.h @@ -93,6 +93,10 @@ class CpuInfo { return model_name_; } + static bool initialized() { + return initialized_; + } + private: static bool initialized_; static int64_t hardware_flags_; diff --git a/cpp/src/parquet/util/logging.h b/cpp/src/parquet/util/logging.h index 30943730ffc11..4c93f8636604d 100644 --- a/cpp/src/parquet/util/logging.h +++ b/cpp/src/parquet/util/logging.h @@ -20,14 +20,90 @@ #include -#define DCHECK(condition) while (false) std::cout -#define DCHECK_EQ(a, b) while (false) std::cout -#define DCHECK_NE(a, b) while (false) std::cout -#define DCHECK_GT(a, b) while (false) std::cout -#define DCHECK_LT(a, b) while (false) std::cout -#define DCHECK_GE(a, b) while (false) std::cout -#define DCHECK_LE(a, b) while (false) std::cout -// Similar to how glog defines DCHECK for release. -#define LOG(level) while (false) std::cout - -#endif +namespace parquet_cpp { + +// Stubbed versions of macros defined in glog/logging.h, intended for +// environments where glog headers aren't available. +// +// Add more as needed. + +// Log levels. LOG ignores them, so their values are abitrary. + +#define PARQUET_INFO 0 +#define PARQUET_WARNING 1 +#define PARQUET_ERROR 2 +#define PARQUET_FATAL 3 + +#define PARQUET_LOG_INTERNAL(level) parquet_cpp::internal::CerrLog(level) +#define PARQUET_LOG(level) PARQUET_LOG_INTERNAL(PARQUET_##level) + +#define PARQUET_CHECK(condition) \ + (condition) ? 0 : PARQUET_LOG(FATAL) << "Check failed: " #condition " " + +#ifdef NDEBUG +#define PARQUET_DFATAL PARQUET_WARNING + +#define DCHECK(condition) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_EQ(val1, val2) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_NE(val1, val2) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_LE(val1, val2) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_LT(val1, val2) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_GE(val1, val2) while (false) parquet_cpp::internal::NullLog() +#define DCHECK_GT(val1, val2) while (false) parquet_cpp::internal::NullLog() + +#else +#define PARQUET_DFATAL PARQUET_FATAL + +#define DCHECK(condition) PARQUET_CHECK(condition) +#define DCHECK_EQ(val1, val2) PARQUET_CHECK((val1) == (val2)) +#define DCHECK_NE(val1, val2) PARQUET_CHECK((val1) != (val2)) +#define DCHECK_LE(val1, val2) PARQUET_CHECK((val1) <= (val2)) +#define DCHECK_LT(val1, val2) PARQUET_CHECK((val1) < (val2)) +#define DCHECK_GE(val1, val2) PARQUET_CHECK((val1) >= (val2)) +#define DCHECK_GT(val1, val2) PARQUET_CHECK((val1) > (val2)) + +#endif // NDEBUG + +namespace internal { + +class NullLog { + public: + template + NullLog& operator<<(const T& t) { + return *this; + } +}; + +class CerrLog { + public: + CerrLog(int severity) // NOLINT(runtime/explicit) + : severity_(severity), + has_logged_(false) { + } + + ~CerrLog() { + if (has_logged_) { + std::cerr << std::endl; + } + if (severity_ == PARQUET_FATAL) { + exit(1); + } + } + + template + CerrLog& operator<<(const T& t) { + has_logged_ = true; + std::cerr << t; + return *this; + } + + private: + const int severity_; + bool has_logged_; +}; + +} // namespace internal + +} // namespace parquet_cpp + +#endif // PARQUET_UTIL_LOGGING_H diff --git a/cpp/src/parquet/util/mem-pool.h b/cpp/src/parquet/util/mem-pool.h index 88a8715509c1c..3f21aa7793246 100644 --- a/cpp/src/parquet/util/mem-pool.h +++ b/cpp/src/parquet/util/mem-pool.h @@ -197,7 +197,7 @@ class MemPool { DCHECK_LE(info.allocated_bytes + num_bytes, info.size); info.allocated_bytes += num_bytes; total_allocated_bytes_ += num_bytes; - DCHECK_LE(current_chunk_idx_, chunks_.size() - 1); + DCHECK_LE(current_chunk_idx_, static_cast(chunks_.size()) - 1); peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_); return result; } diff --git a/cpp/src/parquet/util/rle-encoding.h b/cpp/src/parquet/util/rle-encoding.h index b8dcc8e1c671e..ca9fa4f3ff8b9 100644 --- a/cpp/src/parquet/util/rle-encoding.h +++ b/cpp/src/parquet/util/rle-encoding.h @@ -292,7 +292,7 @@ bool RleDecoder::NextCounts() { /// This function buffers input values 8 at a time. After seeing all 8 values, /// it decides whether they should be encoded as a literal or repeated run. inline bool RleEncoder::Put(uint64_t value) { - DCHECK(bit_width_ == 64 || value < (1LL << bit_width_)); + DCHECK(bit_width_ == 64 || value < (1ULL << bit_width_)); if (UNLIKELY(buffer_full_)) return false; if (LIKELY(current_value_ == value)) {