Skip to content

Commit

Permalink
Merge pull request #10581 from jhump/jh/custom-json-name-validation
Browse files Browse the repository at this point in the history
protoc: validate custom json_name configuration
  • Loading branch information
mcy authored Sep 22, 2022
2 parents bcd1755 + c7ba055 commit d3b5389
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 71 deletions.
108 changes: 101 additions & 7 deletions src/google/protobuf/compiler/parser_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2069,13 +2069,107 @@ TEST_F(ParserValidationErrorTest, Proto3Default) {

TEST_F(ParserValidationErrorTest, Proto3JsonConflictError) {
ExpectHasValidationErrors(
"syntax = 'proto3';\n"
"message TestMessage {\n"
" uint32 foo = 1;\n"
" uint32 Foo = 2;\n"
"}\n",
"3:9: The JSON camel-case name of field \"Foo\" conflicts with field "
"\"foo\". This is not allowed in proto3.\n");
R"pb(
syntax = 'proto3';
message TestMessage {
uint32 foo = 1;
uint32 Foo = 2;
}
)pb",
"4:15: The default JSON name of field \"Foo\" (\"Foo\") conflicts "
"with the default JSON name of field \"foo\" (\"foo\"). "
"This is not allowed in proto3.\n");
}

TEST_F(ParserValidationErrorTest, Proto2JsonConflictError) {
// conflicts with default JSON names are not errors in proto2
ExpectParsesTo(
R"pb(
syntax = "proto2";
message TestMessage {
optional uint32 foo = 1;
optional uint32 Foo = 2;
}
)pb",
R"pb(
syntax: 'proto2'
message_type {
name: 'TestMessage'
field {
label: LABEL_OPTIONAL type: TYPE_UINT32 name: 'foo' number: 1
}
field {
label: LABEL_OPTIONAL type: TYPE_UINT32 name: 'Foo' number: 2
}
}
)pb"
);
}

TEST_F(ParserValidationErrorTest, Proto3CustomJsonConflictWithDefaultError) {
ExpectHasValidationErrors(
R"pb(
syntax = 'proto3';
message TestMessage {
uint32 foo = 1 [json_name='bar'];
uint32 bar = 2;
}
)pb",
"4:15: The default JSON name of field \"bar\" (\"bar\") conflicts "
"with the custom JSON name of field \"foo\". "
"This is not allowed in proto3.\n");
}

TEST_F(ParserValidationErrorTest, Proto2CustomJsonConflictWithDefaultError) {
// conflicts with default JSON names are not errors in proto2
ExpectParsesTo(
R"pb(
syntax = 'proto2';
message TestMessage {
optional uint32 foo = 1 [json_name='bar'];
optional uint32 bar = 2;
}
)pb",
R"pb(
syntax: 'proto2'
message_type {
name: 'TestMessage'
field {
label: LABEL_OPTIONAL type: TYPE_UINT32 name: 'foo' number: 1 json_name: 'bar'
}
field {
label: LABEL_OPTIONAL type: TYPE_UINT32 name: 'bar' number: 2
}
}
)pb"
);
}

TEST_F(ParserValidationErrorTest, Proto3CustomJsonConflictError) {
ExpectHasValidationErrors(
R"pb(
syntax = 'proto3';
message TestMessage {
uint32 foo = 1 [json_name='baz'];
uint32 bar = 2 [json_name='baz'];
}
)pb",
"4:15: The custom JSON name of field \"bar\" (\"baz\") conflicts "
"with the custom JSON name of field \"foo\".\n");
}

TEST_F(ParserValidationErrorTest, Proto2CustomJsonConflictError) {
ExpectHasValidationErrors(
R"pb(
syntax = 'proto2';
message TestMessage {
optional uint32 foo = 1 [json_name='baz'];
optional uint32 bar = 2 [json_name='baz'];
}
)pb",
// fails in proto2 also: can't explicitly configure bad custom JSON names
"4:24: The custom JSON name of field \"bar\" (\"baz\") conflicts "
"with the custom JSON name of field \"foo\".\n");
}

TEST_F(ParserValidationErrorTest, EnumNameError) {
Expand Down
143 changes: 98 additions & 45 deletions src/google/protobuf/descriptor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3856,8 +3856,6 @@ class DescriptorBuilder {
internal::FlatAllocator& alloc);
void BuildOneof(const OneofDescriptorProto& proto, Descriptor* parent,
OneofDescriptor* result, internal::FlatAllocator& alloc);
void CheckEnumValueUniqueness(const EnumDescriptorProto& proto,
const EnumDescriptor* result);
void BuildEnum(const EnumDescriptorProto& proto, const Descriptor* parent,
EnumDescriptor* result, internal::FlatAllocator& alloc);
void BuildEnumValue(const EnumValueDescriptorProto& proto,
Expand All @@ -3869,6 +3867,15 @@ class DescriptorBuilder {
const ServiceDescriptor* parent, MethodDescriptor* result,
internal::FlatAllocator& alloc);

void CheckFieldJsonNameUniqueness(const DescriptorProto& proto,
const Descriptor* result);
void CheckFieldJsonNameUniqueness(const std::string& message_name,
const DescriptorProto& message,
FileDescriptor::Syntax syntax,
bool use_custom_names);
void CheckEnumValueUniqueness(const EnumDescriptorProto& proto,
const EnumDescriptor* result);

void LogUnusedDependency(const FileDescriptorProto& proto,
const FileDescriptor* result);

Expand Down Expand Up @@ -5424,7 +5431,10 @@ void DescriptorBuilder::BuildMessage(const DescriptorProto& proto,
}
}

CheckFieldJsonNameUniqueness(proto, result);

// Check that fields aren't using reserved names or numbers and that they
// aren't using extension numbers.
for (int i = 0; i < result->field_count(); i++) {
const FieldDescriptor* field = result->field(i);
for (int j = 0; j < result->extension_range_count(); j++) {
Expand Down Expand Up @@ -5489,6 +5499,84 @@ void DescriptorBuilder::BuildMessage(const DescriptorProto& proto,
}
}

void DescriptorBuilder::CheckFieldJsonNameUniqueness(
const DescriptorProto& proto, const Descriptor* result) {
FileDescriptor::Syntax syntax = result->file()->syntax();
std::string message_name = result->full_name();
// two passes: one looking only at default JSON names, and one that considers
// custom JSON names
CheckFieldJsonNameUniqueness(message_name, proto, syntax, false);
CheckFieldJsonNameUniqueness(message_name, proto, syntax, true);
}

namespace {
// Helpers for function below

struct JsonNameDetails {
const FieldDescriptorProto* field;
std::string orig_name;
bool is_custom;
};

JsonNameDetails GetJsonNameDetails(const FieldDescriptorProto* field, bool use_custom) {
if (use_custom && field->has_json_name()) {
return {field, field->json_name(), true};
}
return {field, ToJsonName(field->name()), false};
}


} // namespace

void DescriptorBuilder::CheckFieldJsonNameUniqueness(
const std::string& message_name, const DescriptorProto& message,
FileDescriptor::Syntax syntax, bool use_custom_names) {

absl::flat_hash_map<std::string, JsonNameDetails> name_to_field;
for (const FieldDescriptorProto& field : message.field()) {
JsonNameDetails details = GetJsonNameDetails(&field, use_custom_names);
std::string lowercase_name = absl::AsciiStrToLower(details.orig_name);
auto existing = name_to_field.find(lowercase_name);
if (existing == name_to_field.end()) {
name_to_field[lowercase_name] = details;
continue;
}
JsonNameDetails& match = existing->second;
if (use_custom_names && !details.is_custom && !match.is_custom) {
// if this pass is considering custom JSON names, but neither of the
// names involved in the conflict are custom, don't bother with a
// message. That will have been reported from other pass (non-custom
// JSON names).
continue;
}
absl::string_view this_type = details.is_custom ? "custom" : "default";
absl::string_view existing_type = match.is_custom ? "custom" : "default";
// If the matched name differs (which it can only differ in case), include
// it in the error message, for maximum clarity to user.
absl::string_view name_suffix = "";
if (details.orig_name != match.orig_name) {
name_suffix = absl::StrCat(" (\"", match.orig_name, "\")");
}
std::string error_message =
absl::StrFormat("The %s JSON name of field \"%s\" (\"%s\") conflicts "
"with the %s JSON name of field \"%s\"%s.",
this_type, field.name(), details.orig_name, existing_type,
match.field->name(), name_suffix);

bool involves_default = !details.is_custom || !match.is_custom;
if (syntax == FileDescriptor::SYNTAX_PROTO2 && involves_default) {
AddWarning(message_name, field, DescriptorPool::ErrorCollector::NAME,
error_message);
} else {
if (involves_default) {
absl::StrAppend(&error_message, " This is not allowed in proto3.");
}
AddError(message_name, field, DescriptorPool::ErrorCollector::NAME,
error_message);
}
}
}

void DescriptorBuilder::BuildFieldOrExtension(const FieldDescriptorProto& proto,
Descriptor* parent,
FieldDescriptor* result,
Expand Down Expand Up @@ -5892,13 +5980,12 @@ void DescriptorBuilder::CheckEnumValueUniqueness(
// NAME_TYPE_LAST_NAME = 2,
// }
PrefixRemover remover(result->name());
std::map<std::string, const EnumValueDescriptor*> values;
absl::flat_hash_map<std::string, const EnumValueDescriptor*> values;
for (int i = 0; i < result->value_count(); i++) {
const EnumValueDescriptor* value = result->value(i);
std::string stripped =
EnumValueToPascalCase(remover.MaybeRemove(value->name()));
std::pair<std::map<std::string, const EnumValueDescriptor*>::iterator, bool>
insert_result = values.insert(std::make_pair(stripped, value));
auto insert_result = values.insert(std::make_pair(stripped, value));
bool inserted = insert_result.second;

// We don't throw the error if the two conflicting symbols are identical, or
Expand All @@ -5909,19 +5996,18 @@ void DescriptorBuilder::CheckEnumValueUniqueness(
// stripping should de-dup the labels in this case).
if (!inserted && insert_result.first->second->name() != value->name() &&
insert_result.first->second->number() != value->number()) {
std::string error_message =
"Enum name " + value->name() + " has the same name as " +
values[stripped]->name() +
" if you ignore case and strip out the enum name prefix (if any). "
"This is error-prone and can lead to undefined behavior. "
"Please avoid doing this. If you are using allow_alias, please "
"assign the same numeric value to both enums.";
std::string error_message = absl::StrFormat(
"Enum name %s has the same name as %s if you ignore case and strip "
"out the enum name prefix (if any). (If you are using allow_alias, "
"please assign the same numeric value to both enums.)",
value->name(), values[stripped]->name());
// There are proto2 enums out there with conflicting names, so to preserve
// compatibility we issue only a warning for proto2.
if (result->file()->syntax() == FileDescriptor::SYNTAX_PROTO2) {
AddWarning(value->full_name(), proto.value(i),
DescriptorPool::ErrorCollector::NAME, error_message);
} else {
absl::StrAppend(&error_message, " This is not allowed in proto3.");
AddError(value->full_name(), proto.value(i),
DescriptorPool::ErrorCollector::NAME, error_message);
}
Expand Down Expand Up @@ -6788,20 +6874,6 @@ void DescriptorBuilder::ValidateProto3(FileDescriptor* file,
}
}

static std::string ToLowercaseWithoutUnderscores(const std::string& name) {
std::string result;
for (char character : name) {
if (character != '_') {
if (character >= 'A' && character <= 'Z') {
result.push_back(character - 'A' + 'a');
} else {
result.push_back(character);
}
}
}
return result;
}

void DescriptorBuilder::ValidateProto3Message(Descriptor* message,
const DescriptorProto& proto) {
for (int i = 0; i < message->nested_type_count(); ++i) {
Expand All @@ -6826,25 +6898,6 @@ void DescriptorBuilder::ValidateProto3Message(Descriptor* message,
AddError(message->full_name(), proto, DescriptorPool::ErrorCollector::NAME,
"MessageSet is not supported in proto3.");
}

// In proto3, we reject field names if they conflict in camelCase.
// Note that we currently enforce a stricter rule: Field names must be
// unique after being converted to lowercase with underscores removed.
std::map<std::string, const FieldDescriptor*> name_to_field;
for (int i = 0; i < message->field_count(); ++i) {
std::string lowercase_name =
ToLowercaseWithoutUnderscores(message->field(i)->name());
if (name_to_field.find(lowercase_name) != name_to_field.end()) {
AddError(message->full_name(), proto.field(i),
DescriptorPool::ErrorCollector::NAME,
"The JSON camel-case name of field \"" +
message->field(i)->name() + "\" conflicts with field \"" +
name_to_field[lowercase_name]->name() + "\". This is not " +
"allowed in proto3.");
} else {
name_to_field[lowercase_name] = message->field(i);
}
}
}

void DescriptorBuilder::ValidateProto3Field(FieldDescriptor* field,
Expand Down
Loading

0 comments on commit d3b5389

Please sign in to comment.