Skip to content

Commit

Permalink
ARROW-18253: [C++][Parquet] Add additional bounds safety checks (#14592)
Browse files Browse the repository at this point in the history
Lead-authored-by: emkornfield <emkornfield@gmail.com>
Co-authored-by: Micah Kornfield <micahk@google.com>
Co-authored-by: Antoine Pitrou <pitrou@free.fr>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
3 people authored Nov 9, 2022
1 parent 28a1152 commit 147b5c9
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 12 deletions.
9 changes: 7 additions & 2 deletions cpp/src/parquet/arrow/path_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -861,8 +861,13 @@ class MultipathLevelBuilderImpl : public MultipathLevelBuilder {

::arrow::Status Write(int leaf_index, ArrowWriteContext* context,
CallbackFunction write_leaf_callback) override {
DCHECK_GE(leaf_index, 0);
DCHECK_LT(leaf_index, GetLeafCount());
if (ARROW_PREDICT_FALSE(leaf_index < 0 || leaf_index >= GetLeafCount())) {
return Status::Invalid("Column index out of bounds (got ", leaf_index,
", should be "
"between 0 and ",
GetLeafCount(), ")");
}

return WritePath(root_range_, &path_builder_->paths()[leaf_index], context,
std::move(write_leaf_callback));
}
Expand Down
9 changes: 9 additions & 0 deletions cpp/src/parquet/arrow/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,15 @@ class FileReaderImpl : public FileReader {
const std::shared_ptr<std::unordered_set<int>>& included_leaves,
const std::vector<int>& row_groups,
std::unique_ptr<ColumnReaderImpl>* out) {
// Should be covered by GetRecordBatchReader checks but
// manifest_.schema_fields is a separate variable so be extra careful.
if (ARROW_PREDICT_FALSE(i < 0 ||
static_cast<size_t>(i) >= manifest_.schema_fields.size())) {
return Status::Invalid("Column index out of bounds (got ", i,
", should be "
"between 0 and ",
manifest_.schema_fields.size(), ")");
}
auto ctx = std::make_shared<ReaderContext>();
ctx->reader = reader_.get();
ctx->pool = pool_;
Expand Down
21 changes: 16 additions & 5 deletions cpp/src/parquet/metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,13 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
schema_(schema),
properties_(properties),
writer_version_(writer_version),
file_decryptor_(std::move(file_decryptor)) {}
file_decryptor_(std::move(file_decryptor)) {
if (ARROW_PREDICT_FALSE(row_group_->columns.size() >
static_cast<size_t>(std::numeric_limits<int>::max()))) {
throw ParquetException("Row group had too many columns: ",
row_group_->columns.size());
}
}

bool Equals(const RowGroupMetaDataImpl& other) const {
return *row_group_ == *other.row_group_;
Expand All @@ -457,10 +463,10 @@ class RowGroupMetaData::RowGroupMetaDataImpl {
inline const SchemaDescriptor* schema() const { return schema_; }

std::unique_ptr<ColumnChunkMetaData> ColumnChunk(int i) {
if (i < num_columns()) {
if (i >= 0 && i < num_columns()) {
return ColumnChunkMetaData::Make(&row_group_->columns[i], schema_->Column(i),
properties_, writer_version_, row_group_->ordinal,
static_cast<int16_t>(i), file_decryptor_);
i, file_decryptor_);
}
throw ParquetException("The file only has ", num_columns(),
" columns, requested metadata for column: ", i);
Expand Down Expand Up @@ -656,7 +662,7 @@ class FileMetaData::FileMetaDataImpl {
}

std::unique_ptr<RowGroupMetaData> RowGroup(int i) {
if (!(i < num_row_groups())) {
if (!(i >= 0 && i < num_row_groups())) {
std::stringstream ss;
ss << "The file only has " << num_row_groups()
<< " row groups, requested metadata for row group: " << i;
Expand Down Expand Up @@ -685,7 +691,12 @@ class FileMetaData::FileMetaDataImpl {
}

format::RowGroup& row_group(int i) {
DCHECK_LT(i, num_row_groups());
if (!(i >= 0 && i < num_row_groups())) {
std::stringstream ss;
ss << "The file only has " << num_row_groups()
<< " row groups, requested metadata for row group: " << i;
throw ParquetException(ss.str());
}
return metadata_->row_groups[i];
}

Expand Down
18 changes: 13 additions & 5 deletions cpp/src/parquet/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@
using parquet::format::SchemaElement;

namespace parquet {

namespace schema {

namespace {

void ThrowInvalidLogicalType(const LogicalType& logical_type) {
Expand All @@ -43,8 +40,19 @@ void ThrowInvalidLogicalType(const LogicalType& logical_type) {
throw ParquetException(ss.str());
}

void CheckColumnBounds(int column_index, size_t max_columns) {
if (ARROW_PREDICT_FALSE(column_index < 0 ||
static_cast<size_t>(column_index) >= max_columns)) {
std::stringstream ss;
ss << "Invalid Column Index: " << column_index << " Num columns: " << max_columns;
throw ParquetException(ss.str());
}
}

} // namespace

namespace schema {

// ----------------------------------------------------------------------
// ColumnPath

Expand Down Expand Up @@ -874,7 +882,7 @@ bool ColumnDescriptor::Equals(const ColumnDescriptor& other) const {
}

const ColumnDescriptor* SchemaDescriptor::Column(int i) const {
DCHECK(i >= 0 && i < static_cast<int>(leaves_.size()));
CheckColumnBounds(i, leaves_.size());
return &leaves_[i];
}

Expand All @@ -899,7 +907,7 @@ int SchemaDescriptor::ColumnIndex(const Node& node) const {
}

const schema::Node* SchemaDescriptor::GetColumnRoot(int i) const {
DCHECK(i >= 0 && i < static_cast<int>(leaves_.size()));
CheckColumnBounds(i, leaves_.size());
return leaf_to_base_.find(i)->second.get();
}

Expand Down

0 comments on commit 147b5c9

Please sign in to comment.