Skip to content

Commit

Permalink
Merge pull request #10555 from jhump/jh/fix-consistency-with-very-lar…
Browse files Browse the repository at this point in the history
…ge-decimal-numbers

protoc: fix consistency with parsing very large decimal numbers
  • Loading branch information
fowles authored Sep 15, 2022
2 parents 0d0164f + 7e745c4 commit c4644b7
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 48 deletions.
44 changes: 31 additions & 13 deletions src/google/protobuf/compiler/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,16 @@ bool Parser::ConsumeInteger64(uint64_t max_value, uint64_t* output,
}
}

bool Parser::TryConsumeInteger64(uint64_t max_value, uint64_t* output) {
if (LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
io::Tokenizer::ParseInteger(input_->current().text, max_value,
output)) {
input_->Next();
return true;
}
return false;
}

bool Parser::ConsumeNumber(double* output, const char* error) {
if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
*output = io::Tokenizer::ParseFloat(input_->current().text);
Expand All @@ -296,13 +306,19 @@ bool Parser::ConsumeNumber(double* output, const char* error) {
} else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
// Also accept integers.
uint64_t value = 0;
if (!io::Tokenizer::ParseInteger(input_->current().text,
if (io::Tokenizer::ParseInteger(input_->current().text,
std::numeric_limits<uint64_t>::max(),
&value)) {
*output = value;
} else if (input_->current().text[0] == '0') {
// octal or hexadecimal; don't bother parsing as float
AddError("Integer out of range.");
// We still return true because we did, in fact, parse a number.
} else if (!io::Tokenizer::TryParseFloat(input_->current().text, output)) {
// out of int range, and not valid float? 🤷
AddError("Integer out of range.");
// We still return true because we did, in fact, parse a number.
}
*output = value;
input_->Next();
return true;
} else if (LookingAt("inf")) {
Expand Down Expand Up @@ -1575,18 +1591,20 @@ bool Parser::ParseOption(Message* options,
is_negative
? static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + 1
: std::numeric_limits<uint64_t>::max();
DO(ConsumeInteger64(max_value, &value, "Expected integer."));
if (is_negative) {
value_location.AddPath(
UninterpretedOption::kNegativeIntValueFieldNumber);
uninterpreted_option->set_negative_int_value(
static_cast<int64_t>(0 - value));
} else {
value_location.AddPath(
UninterpretedOption::kPositiveIntValueFieldNumber);
uninterpreted_option->set_positive_int_value(value);
if (TryConsumeInteger64(max_value, &value)) {
if (is_negative) {
value_location.AddPath(
UninterpretedOption::kNegativeIntValueFieldNumber);
uninterpreted_option->set_negative_int_value(
static_cast<int64_t>(0 - value));
} else {
value_location.AddPath(
UninterpretedOption::kPositiveIntValueFieldNumber);
uninterpreted_option->set_positive_int_value(value);
}
break;
}
break;
// value too large for an integer; fall through below to treat as floating point
}

case io::Tokenizer::TYPE_FLOAT: {
Expand Down
3 changes: 3 additions & 0 deletions src/google/protobuf/compiler/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ class PROTOBUF_EXPORT Parser {
// is greater than max_value, an error will be reported.
bool ConsumeInteger64(uint64_t max_value, uint64_t* output,
const char* error);
// Try to consume a 64-bit integer and store its value in "output". No
// error is reported on failure, allowing caller to consume token another way.
bool TryConsumeInteger64(uint64_t max_value, uint64_t* output);
// Consume a number and store its value in "output". This will accept
// tokens of either INTEGER or FLOAT type.
bool ConsumeNumber(double* output, const char* error);
Expand Down
66 changes: 66 additions & 0 deletions src/google/protobuf/compiler/parser_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,56 @@ TEST_F(ParseMessageTest, FieldOptions) {
"}");
}

TEST_F(ParseMessageTest, FieldOptionsSupportLargeDecimalLiteral) {
// decimal integer literal > uint64 max
ExpectParsesTo(
"import \"google/protobuf/descriptor.proto\";\n"
"extend google.protobuf.FieldOptions {\n"
" optional double f = 10101;\n"
"}\n"
"message TestMessage {\n"
" optional double a = 1 [default = 18446744073709551616];\n"
" optional double b = 2 [default = -18446744073709551616];\n"
" optional double c = 3 [(f) = 18446744073709551616];\n"
" optional double d = 4 [(f) = -18446744073709551616];\n"
"}\n",

"dependency: \"google/protobuf/descriptor.proto\""
"extension {"
" name: \"f\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 10101"
" extendee: \"google.protobuf.FieldOptions\""
"}"
"message_type {"
" name: \"TestMessage\""
" field {"
" name: \"a\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 1"
" default_value: \"1.8446744073709552e+19\""
" }"
" field {"
" name: \"b\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 2"
" default_value: \"-1.8446744073709552e+19\""
" }"
" field {"
" name: \"c\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 3"
" options{"
" uninterpreted_option{"
" name{ name_part: \"f\" is_extension: true }"
" double_value: 1.8446744073709552e+19"
" }"
" }"
" }"
" field {"
" name: \"d\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 4"
" options{"
" uninterpreted_option{"
" name{ name_part: \"f\" is_extension: true }"
" double_value: -1.8446744073709552e+19"
" }"
" }"
" }"
"}");
}

TEST_F(ParseMessageTest, Oneof) {
ExpectParsesTo(
"message TestMessage {\n"
Expand Down Expand Up @@ -1891,6 +1941,22 @@ TEST_F(ParserValidationErrorTest, FieldDefaultValueError) {
"2:32: Enum type \"Baz\" has no value named \"NO_SUCH_VALUE\".\n");
}

TEST_F(ParserValidationErrorTest, FieldDefaultIntegerOutOfRange) {
ExpectHasErrors(
"message Foo {\n"
" optional double bar = 1 [default = 0x10000000000000000];\n"
"}\n",
"1:37: Integer out of range.\n");
}

TEST_F(ParserValidationErrorTest, FieldOptionOutOfRange) {
ExpectHasErrors(
"message Foo {\n"
" optional double bar = 1 [foo = 0x10000000000000000];\n"
"}\n",
"1:33: Integer out of range.\n");
}

TEST_F(ParserValidationErrorTest, FileOptionNameError) {
ExpectHasValidationErrors(
"option foo = 5;",
Expand Down
50 changes: 30 additions & 20 deletions src/google/protobuf/descriptor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "absl/strings/ascii.h"
#include "absl/strings/escaping.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "google/protobuf/stubs/stringprintf.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
Expand Down Expand Up @@ -7675,6 +7676,27 @@ bool DescriptorBuilder::OptionInterpreter::ExamineIfOptionIsSet(
return true;
}

namespace {
// Helpers for method below

template <typename T> std::string ValueOutOfRange(
absl::string_view type_name, absl::string_view option_name) {
return absl::StrFormat(
"Value out of range, %d to %d, for %s option \"%s\".", \
std::numeric_limits<T>::min(), std::numeric_limits<T>::max(),
type_name, option_name);
}

template <typename T> std::string ValueMustBeInt(
absl::string_view type_name, absl::string_view option_name) {
return absl::StrFormat(
"Value must be integer, from %d to %d, for %s option \"%s\".", \
std::numeric_limits<T>::min(), std::numeric_limits<T>::max(),
type_name, option_name);
}

} // namespace

bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
// We switch on the CppType to validate.
Expand All @@ -7683,8 +7705,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
if (uninterpreted_option_->has_positive_int_value()) {
if (uninterpreted_option_->positive_int_value() >
static_cast<uint64_t>(std::numeric_limits<int32_t>::max())) {
return AddValueError("Value out of range for int32 option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueOutOfRange<int32_t>("int32", option_field->full_name()));
} else {
SetInt32(option_field->number(),
uninterpreted_option_->positive_int_value(),
Expand All @@ -7693,25 +7714,22 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
} else if (uninterpreted_option_->has_negative_int_value()) {
if (uninterpreted_option_->negative_int_value() <
static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
return AddValueError("Value out of range for int32 option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueOutOfRange<int32_t>("int32", option_field->full_name()));
} else {
SetInt32(option_field->number(),
uninterpreted_option_->negative_int_value(),
option_field->type(), unknown_fields);
}
} else {
return AddValueError("Value must be integer for int32 option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueMustBeInt<int32_t>("int32", option_field->full_name()));
}
break;

case FieldDescriptor::CPPTYPE_INT64:
if (uninterpreted_option_->has_positive_int_value()) {
if (uninterpreted_option_->positive_int_value() >
static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
return AddValueError("Value out of range for int64 option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueOutOfRange<int64_t>("int64", option_field->full_name()));
} else {
SetInt64(option_field->number(),
uninterpreted_option_->positive_int_value(),
Expand All @@ -7722,27 +7740,22 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
uninterpreted_option_->negative_int_value(),
option_field->type(), unknown_fields);
} else {
return AddValueError("Value must be integer for int64 option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueMustBeInt<int64_t>("int64", option_field->full_name()));
}
break;

case FieldDescriptor::CPPTYPE_UINT32:
if (uninterpreted_option_->has_positive_int_value()) {
if (uninterpreted_option_->positive_int_value() >
std::numeric_limits<uint32_t>::max()) {
return AddValueError("Value out of range for uint32 option \"" +
option_field->name() + "\".");
return AddValueError(ValueOutOfRange<uint32_t>("uint32", option_field->full_name()));
} else {
SetUInt32(option_field->number(),
uninterpreted_option_->positive_int_value(),
option_field->type(), unknown_fields);
}
} else {
return AddValueError(
"Value must be non-negative integer for uint32 "
"option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueMustBeInt<uint32_t>("uint32", option_field->full_name()));
}
break;

Expand All @@ -7752,10 +7765,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
uninterpreted_option_->positive_int_value(),
option_field->type(), unknown_fields);
} else {
return AddValueError(
"Value must be non-negative integer for uint64 "
"option \"" +
option_field->full_name() + "\".");
return AddValueError(ValueMustBeInt<uint64_t>("uint64", option_field->full_name()));
}
break;

Expand Down
16 changes: 8 additions & 8 deletions src/google/protobuf/descriptor_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5557,7 +5557,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueOutOfPositiveRange) {
" positive_int_value: 0x80000000 } "
"}",

"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, -2147483648 to 2147483647, "
"for int32 option \"foo\".\n");
}

Expand All @@ -5574,7 +5574,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueOutOfNegativeRange) {
" negative_int_value: -0x80000001 } "
"}",

"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, -2147483648 to 2147483647, "
"for int32 option \"foo\".\n");
}

Expand All @@ -5590,7 +5590,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueIsNotPositiveInt) {
" is_extension: true } "
" string_value: \"5\" } }",

"foo.proto: foo.proto: OPTION_VALUE: Value must be integer "
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from -2147483648 to 2147483647, "
"for int32 option \"foo\".\n");
}

Expand All @@ -5608,7 +5608,7 @@ TEST_F(ValidationErrorTest, Int64OptionValueOutOfRange) {
"} "
"}",

"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, -9223372036854775808 to 9223372036854775807, "
"for int64 option \"foo\".\n");
}

Expand All @@ -5624,7 +5624,7 @@ TEST_F(ValidationErrorTest, Int64OptionValueIsNotPositiveInt) {
" is_extension: true } "
" identifier_value: \"5\" } }",

"foo.proto: foo.proto: OPTION_VALUE: Value must be integer "
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from -9223372036854775808 to 9223372036854775807, "
"for int64 option \"foo\".\n");
}

Expand All @@ -5640,7 +5640,7 @@ TEST_F(ValidationErrorTest, UInt32OptionValueOutOfRange) {
" is_extension: true } "
" positive_int_value: 0x100000000 } }",

"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, 0 to 4294967295, "
"for uint32 option \"foo\".\n");
}

Expand All @@ -5656,7 +5656,7 @@ TEST_F(ValidationErrorTest, UInt32OptionValueIsNotPositiveInt) {
" is_extension: true } "
" double_value: -5.6 } }",

"foo.proto: foo.proto: OPTION_VALUE: Value must be non-negative integer "
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from 0 to 4294967295, "
"for uint32 option \"foo\".\n");
}

Expand All @@ -5672,7 +5672,7 @@ TEST_F(ValidationErrorTest, UInt64OptionValueIsNotPositiveInt) {
" is_extension: true } "
" negative_int_value: -5 } }",

"foo.proto: foo.proto: OPTION_VALUE: Value must be non-negative integer "
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from 0 to 18446744073709551615, "
"for uint64 option \"foo\".\n");
}

Expand Down
20 changes: 13 additions & 7 deletions src/google/protobuf/io/tokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1002,9 +1002,20 @@ bool Tokenizer::ParseInteger(const std::string& text, uint64_t max_value,
}

double Tokenizer::ParseFloat(const std::string& text) {
double result = 0;
if (!TryParseFloat(text, &result)) {
GOOGLE_LOG(DFATAL)
<< " Tokenizer::ParseFloat() passed text that could not have been"
" tokenized as a float: "
<< absl::CEscape(text);
}
return result;
}

bool Tokenizer::TryParseFloat(const std::string& text, double* result) {
const char* start = text.c_str();
char* end;
double result = NoLocaleStrtod(start, &end);
*result = NoLocaleStrtod(start, &end);

// "1e" is not a valid float, but if the tokenizer reads it, it will
// report an error but still return it as a valid token. We need to
Expand All @@ -1020,12 +1031,7 @@ double Tokenizer::ParseFloat(const std::string& text) {
++end;
}

GOOGLE_LOG_IF(DFATAL,
static_cast<size_t>(end - start) != text.size() || *start == '-')
<< " Tokenizer::ParseFloat() passed text that could not have been"
" tokenized as a float: "
<< absl::CEscape(text);
return result;
return static_cast<size_t>(end - start) == text.size() && *start != '-';
}

// Helper to append a Unicode code point to a string as UTF8, without bringing
Expand Down
4 changes: 4 additions & 0 deletions src/google/protobuf/io/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ class PROTOBUF_EXPORT Tokenizer {
// result is undefined (possibly an assert failure).
static double ParseFloat(const std::string& text);

// Parses given text as if it were a TYPE_FLOAT token. Returns false if the
// given text is not actually a valid float literal.
static bool TryParseFloat(const std::string& text, double* result);

// Parses a TYPE_STRING token. This never fails, so long as the text actually
// comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the
// result is undefined (possibly an assert failure).
Expand Down

0 comments on commit c4644b7

Please sign in to comment.