From 27d42c5ba7ab65956e71b9d825f68ef84c38bbde Mon Sep 17 00:00:00 2001 From: Mike Kruskal Date: Wed, 23 Aug 2023 09:24:17 -0700 Subject: [PATCH] Fix a bug that strips options from descriptor.proto in Python. This fixes Python/C++ and upb, and pushes the buggy behavior to pure python. There, it's very difficult to handle options on the bootstrapped proto with the current architecture. Future changes will attempt to address this more isolated issue. PiperOrigin-RevId: 559450900 --- python/google/protobuf/descriptor.py | 73 +++++++++++++------ .../protobuf/internal/reflection_test.py | 58 +++++++++++++++ .../protobuf/compiler/python/generator.cc | 2 +- 3 files changed, 110 insertions(+), 23 deletions(-) diff --git a/python/google/protobuf/descriptor.py b/python/google/protobuf/descriptor.py index fcb87cab5581..fb9632b539d6 100755 --- a/python/google/protobuf/descriptor.py +++ b/python/google/protobuf/descriptor.py @@ -115,6 +115,16 @@ def _Deprecated(name): _internal_create_key = object() +def _IsDescriptorBootstrapProto(file): + """Checks if the file descriptor corresponds to our bootstrapped descriptor.proto""" + if file is None: + return False + return ( + file.name == 'net/proto2/proto/descriptor.proto' + or file.name == 'google/protobuf/descriptor.proto' + ) + + class DescriptorBase(metaclass=DescriptorMetaclass): """Descriptors base class. @@ -123,11 +133,12 @@ class DescriptorBase(metaclass=DescriptorMetaclass): related functionality. Attributes: - has_options: True if the descriptor has non-default options. Usually it - is not necessary to read this -- just call GetOptions() which will - happily return the default instance. However, it's sometimes useful - for efficiency, and also useful inside the protobuf implementation to - avoid some bootstrapping issues. + has_options: True if the descriptor has non-default options. Usually it is + not necessary to read this -- just call GetOptions() which will happily + return the default instance. However, it's sometimes useful for + efficiency, and also useful inside the protobuf implementation to avoid + some bootstrapping issues. + file (FileDescriptor): Reference to file info. """ if _USE_C_DESCRIPTORS: @@ -135,17 +146,22 @@ class DescriptorBase(metaclass=DescriptorMetaclass): # subclasses" of this descriptor class. _C_DESCRIPTOR_CLASS = () - def __init__(self, options, serialized_options, options_class_name): + def __init__(self, file, options, serialized_options, options_class_name): """Initialize the descriptor given its options message and the name of the class of the options message. The name of the class is required in case the options message is None and has to be created. """ + self.file = file self._options = options self._options_class_name = options_class_name - self._serialized_options = serialized_options + self._serialized_options = ( + serialized_options if not _IsDescriptorBootstrapProto(file) else None + ) # Does this descriptor have non-default options? - self.has_options = (options is not None) or (serialized_options is not None) + self.has_options = (self._options is not None) or ( + self._serialized_options is not None + ) def _SetOptions(self, options, options_class_name): """Sets the descriptor's options @@ -195,14 +211,12 @@ def __init__(self, options, options_class_name, name, full_name, """Constructor. Args: - options: Protocol message options or None - to use default message options. + options: Protocol message options or None to use default message options. options_class_name (str): The class name of the above options. name (str): Name of this protocol message type. - full_name (str): Fully-qualified name of this protocol message type, - which will include protocol "package" name and the name of any - enclosing types. - file (FileDescriptor): Reference to file info. + full_name (str): Fully-qualified name of this protocol message type, which + will include protocol "package" name and the name of any enclosing + types. containing_type: if provided, this is a nested descriptor, with this descriptor as parent, otherwise None. serialized_start: The start index (inclusive) in block in the @@ -212,13 +226,13 @@ def __init__(self, options, options_class_name, name, full_name, serialized_options: Protocol message serialized options or None. """ super(_NestedDescriptorBase, self).__init__( - options, serialized_options, options_class_name) + file, options, serialized_options, options_class_name + ) self.name = name # TODO(falk): Add function to calculate full_name instead of having it in # memory? self.full_name = full_name - self.file = file self.containing_type = containing_type self._serialized_start = serialized_start @@ -581,10 +595,10 @@ def __init__(self, name, full_name, index, number, type, cpp_type, label, _Deprecated('FieldDescriptor') super(FieldDescriptor, self).__init__( - options, serialized_options, 'FieldOptions') + file, options, serialized_options, 'FieldOptions' + ) self.name = name self.full_name = full_name - self.file = file self._camelcase_name = None if json_name is None: self.json_name = _ToJsonName(name) @@ -732,6 +746,7 @@ def __init__(self, name, full_name, filename, values, self.values = values for value in self.values: + value.file = file value.type = self self.values_by_name = dict((v.name, v) for v in values) # Values are reversed to ensure that the first alias is retained. @@ -808,7 +823,11 @@ def __init__(self, name, index, number, _Deprecated('EnumValueDescriptor') super(EnumValueDescriptor, self).__init__( - options, serialized_options, 'EnumValueOptions') + type.file if type else None, + options, + serialized_options, + 'EnumValueOptions', + ) self.name = name self.index = index self.number = number @@ -847,7 +866,11 @@ def __init__( _Deprecated('OneofDescriptor') super(OneofDescriptor, self).__init__( - options, serialized_options, 'OneofOptions') + containing_type.file if containing_type else None, + options, + serialized_options, + 'OneofOptions', + ) self.name = name self.full_name = full_name self.index = index @@ -907,6 +930,7 @@ def __init__(self, name, full_name, index, methods, options=None, self.methods_by_name = dict((m.name, m) for m in methods) # Set the containing service for each method in this service. for method in self.methods: + method.file = self.file method.containing_service = self def FindMethodByName(self, name): @@ -992,7 +1016,11 @@ def __init__(self, _Deprecated('MethodDescriptor') super(MethodDescriptor, self).__init__( - options, serialized_options, 'MethodOptions') + containing_service.file if containing_service else None, + options, + serialized_options, + 'MethodOptions', + ) self.name = name self.full_name = full_name self.index = index @@ -1076,7 +1104,8 @@ def __init__(self, name, package, options=None, _Deprecated('FileDescriptor') super(FileDescriptor, self).__init__( - options, serialized_options, 'FileOptions') + None, options, serialized_options, 'FileOptions' + ) if pool is None: from google.protobuf import descriptor_pool diff --git a/python/google/protobuf/internal/reflection_test.py b/python/google/protobuf/internal/reflection_test.py index 0708d51e6f6a..e038927b6847 100755 --- a/python/google/protobuf/internal/reflection_test.py +++ b/python/google/protobuf/internal/reflection_test.py @@ -2053,6 +2053,64 @@ def testFileDescriptorErrors(self): # dependency on the C++ logging code. self.assertIn('test_file_descriptor_errors.msg1', str(cm.exception)) + @unittest.skipIf( + api_implementation.Type() == 'python', + 'Options are not supported on descriptor.proto in pure python' + ' (b/296476238).', + ) + def testDescriptorProtoHasFileOptions(self): + self.assertTrue(descriptor_pb2.DESCRIPTOR.has_options) + self.assertEqual( + descriptor_pb2.DESCRIPTOR.GetOptions().java_package, + 'com.google.protobuf', + ) + + @unittest.skipIf( + api_implementation.Type() == 'python', + 'Options are not supported on descriptor.proto in pure python' + ' (b/296476238).', + ) + def testDescriptorProtoHasFieldOptions(self): + self.assertTrue(descriptor_pb2.DESCRIPTOR.has_options) + self.assertEqual( + descriptor_pb2.DESCRIPTOR.GetOptions().java_package, + 'com.google.protobuf', + ) + packed_desc = ( + descriptor_pb2.SourceCodeInfo.DESCRIPTOR.nested_types_by_name.get( + 'Location' + ).fields_by_name.get('path') + ) + self.assertTrue(packed_desc.has_options) + self.assertTrue(packed_desc.GetOptions().packed) + + @unittest.skipIf( + api_implementation.Type() == 'python', + 'Options are not supported on descriptor.proto in pure python' + ' (b/296476238).', + ) + def testDescriptorProtoHasFeatureOptions(self): + self.assertTrue(descriptor_pb2.DESCRIPTOR.has_options) + self.assertEqual( + descriptor_pb2.DESCRIPTOR.GetOptions().java_package, + 'com.google.protobuf', + ) + presence_desc = descriptor_pb2.FeatureSet.DESCRIPTOR.fields_by_name.get( + 'field_presence' + ) + self.assertTrue(presence_desc.has_options) + self.assertEqual( + presence_desc.GetOptions().retention, + descriptor_pb2.FieldOptions.OptionRetention.RETENTION_RUNTIME, + ) + self.assertListsEqual( + presence_desc.GetOptions().targets, + [ + descriptor_pb2.FieldOptions.OptionTargetType.TARGET_TYPE_FIELD, + descriptor_pb2.FieldOptions.OptionTargetType.TARGET_TYPE_FILE, + ], + ) + def testStringUTF8Serialization(self): proto = message_set_extensions_pb2.TestMessageSet() extension_message = message_set_extensions_pb2.TestMessageSetExtension2 diff --git a/src/google/protobuf/compiler/python/generator.cc b/src/google/protobuf/compiler/python/generator.cc index 5d2fd4aeb156..c90cc8f0912e 100644 --- a/src/google/protobuf/compiler/python/generator.cc +++ b/src/google/protobuf/compiler/python/generator.cc @@ -1032,7 +1032,7 @@ void Generator::PrintEnumValueDescriptor( // Returns a CEscaped string of serialized_options. std::string Generator::OptionsValue( absl::string_view serialized_options) const { - if (serialized_options.length() == 0 || GeneratingDescriptorProto()) { + if (serialized_options.length() == 0) { return "None"; } else { return absl::StrCat("b'", absl::CEscape(serialized_options), "'");