Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add C# deterministic serialization #13160

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,59 @@ public void IDictionaryValues_Equals_IReadOnlyDictionaryValues()
CollectionAssert.AreEquivalent(((IDictionary<string, string>)map).Values, ((IReadOnlyDictionary<string, string>)map).Values);
}

[Test]
public void SortIntKeys_RandomOrder()
{
var map = new MapField<int, string>() { { 1, "val" }, { -1, "val"}, { 0, "val" } };
var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
var sortedKeys = sortedList.Select(kvp => kvp.Key);
CollectionAssert.AreEqual(new[] { -1, 0, 1 }, sortedKeys);
}

[Test]
public void SortIntKeys_Empty()
{
var map = new MapField<int, string> { };
var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
var sortedKeys = sortedList.Select(kvp => kvp.Key);
Assert.IsEmpty(sortedKeys);
}

[Test]
public void SortStringKeys_RandomOrder()
{
var map = new MapField<string, string> { { "a", "val" }, { "c", "val" }, { "b", "val" } };
var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
var sortedKeys = sortedList.Select(kvp => kvp.Key);
CollectionAssert.AreEqual(new[] { "a", "b", "c" }, sortedKeys);
}

[Test]
public void SortStringKeys_EnsureOrdinalSort()
{
var map = new MapField<string, string>
{
{ "i", "val" } , { "I", "val" }, { "ı", "val" }, { "İ", "val" }
};
var sortedList = map.GetSortedListCopy(map.ToList());
var sortedKeys = sortedList.Select(kvp => kvp.Key);
// Assert Ordinal sort I, i, ı, İ (Non-ordinal sort returns i, I, İ, ı)
// I == 0x49 , i == 0x69 , İ == 0x130 , ı == 0x131
CollectionAssert.AreEqual(new[] { "I", "i", "İ", "ı" }, sortedKeys);
}

[Test]
public void SortBoolKeys()
{
var map = new MapField<bool, string>
{
{ true, "val" } , { false, "val" }
};
var sortedList = map.GetSortedListCopy(map.ToList());
var sortedKeys = sortedList.Select(kvp => kvp.Key);
CollectionAssert.AreEqual(new[] { false, true }, sortedKeys);
}

private static KeyValuePair<TKey, TValue> NewKeyValuePair<TKey, TValue>(TKey key, TValue value)
{
return new KeyValuePair<TKey, TValue>(key, value);
Expand Down
75 changes: 74 additions & 1 deletion csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ public void MapIgnoresExtraFieldsWithinEntryMessages()

output.WriteTag(TestMap.MapInt32Int32FieldNumber, WireFormat.WireType.LengthDelimited);

var key = 10; // Field 1
var key = 10; // Field 1
var value = 20; // Field 2
var extra = 30; // Field 3

Expand Down Expand Up @@ -691,6 +691,79 @@ public void OneofSerialization_DefaultValue()
});
}

[Test]
public void MapStringString_DeterministicTrue_ThenBytesIdentical()
{
// Define three strings consisting of different versions of the letter I.
// LATIN CAPITAL LETTER I (U+0049)
string capitalLetterI = "I";
// LATIN SMALL LETTER I (U+0069)
string smallLetterI = "i";
// LATIN SMALL LETTER DOTLESS I (U+0131)
string smallLetterDotlessI = "\u0131";
var testMap1 = new TestMap();

testMap1.MapStringString.Add(smallLetterDotlessI, "value_"+smallLetterDotlessI);
testMap1.MapStringString.Add(smallLetterI, "value_"+smallLetterI);
testMap1.MapStringString.Add(capitalLetterI, "content_"+capitalLetterI);
var bytes1 = SerializeTestMap(testMap1, true);

var testMap2 = new TestMap();
testMap2.MapStringString.Add(capitalLetterI, "content_"+capitalLetterI);
testMap2.MapStringString.Add(smallLetterI, "value_"+smallLetterI);
testMap2.MapStringString.Add(smallLetterDotlessI, "value_"+smallLetterDotlessI);

var bytes2 = SerializeTestMap(testMap2, true);
var parsedBytes2 = TestMap.Parser.ParseFrom(bytes2);
var parsedBytes1 = TestMap.Parser.ParseFrom(bytes1);
Assert.IsTrue(bytes1.SequenceEqual(bytes2));
}

[Test]
public void MapInt32Bytes_DeterministicTrue_ThenBytesIdentical()
{
var testMap1 = new TestMap();
testMap1.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
testMap1.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
var bytes1 = SerializeTestMap(testMap1, true);

var testMap2 = new TestMap();
testMap2.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
testMap2.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
var bytes2 = SerializeTestMap(testMap2, true);

Assert.IsTrue(bytes1.SequenceEqual(bytes2));
}

[Test]
public void MapInt32Bytes_DeterministicFalse_ThenBytesDifferent()
{
var testMap1 = new TestMap();
testMap1.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
testMap1.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
var bytes1 = SerializeTestMap(testMap1, false);

var testMap2 = new TestMap();
testMap2.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
testMap2.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
var bytes2 = SerializeTestMap(testMap2, false);

Assert.IsFalse(bytes1.SequenceEqual(bytes2));
}

private byte[] SerializeTestMap(TestMap testMap, bool deterministic)
{
using var memoryStream = new MemoryStream();
var codedOutputStream = new CodedOutputStream(memoryStream);
codedOutputStream.Deterministic = deterministic;

testMap.WriteTo(codedOutputStream);
codedOutputStream.Flush();

memoryStream.Seek(0, SeekOrigin.Begin);
return memoryStream.ToArray();
}

[Test]
public void DiscardUnknownFields_RealDataStillRead()
{
Expand Down
29 changes: 28 additions & 1 deletion csharp/src/Google.Protobuf/CodedOutputStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,33 @@ public long Position
}
}

/// <summary>
/// Configures whether or not serialization is deterministic.
/// </summary>
/// <remarks>
/// Deterministic serialization guarantees that for a given binary, equal messages (defined by the
/// equals methods in protos) will always be serialized to the same bytes. This implies:
/// <list type="bullet">
/// <item><description>Repeated serialization of a message will return the same bytes.</description></item>
/// <item><description>Different processes of the same binary (which may be executing on different machines)
/// will serialize equal messages to the same bytes.</description></item>
/// </list>
/// Note the deterministic serialization is NOT canonical across languages; it is also unstable
/// across different builds with schema changes due to unknown fields. Users who need canonical
/// serialization, e.g. persistent storage in a canonical form, fingerprinting, etc, should define
/// their own canonicalization specification and implement the serializer using reflection APIs
/// rather than relying on this API.
/// Once set, the serializer will: (Note this is an implementation detail and may subject to
/// change in the future)
/// <list type="bullet">
/// <item><description>Sort map entries by keys in lexicographical order or numerical order. Note: For string
/// keys, the order is based on comparing the UTF-16 code unit value of each character in the strings.
/// The order may be different from the deterministic serialization in other languages where
/// maps are sorted on the lexicographical order of the UTF8 encoded keys.</description></item>
/// </list>
/// </remarks>
public bool Deterministic { get; set; }

#region Writing of values (not including tags)

/// <summary>
Expand Down Expand Up @@ -485,7 +512,7 @@ public void WriteRawTag(byte b1, byte b2, byte b3, byte b4, byte b5)
#endregion

#region Underlying writing primitives

/// <summary>
/// Writes a 32 bit value as a varint. The fast route is taken when
/// there's enough buffer space left to whizz through without checking
Expand Down
42 changes: 38 additions & 4 deletions csharp/src/Google.Protobuf/Collections/MapField.cs
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ bool ICollection<KeyValuePair<TKey, TValue>>.Remove(KeyValuePair<TKey, TValue> i
/// Returns a hash code for this instance.
/// </summary>
/// <returns>
/// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table.
/// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table.
/// </returns>
public override int GetHashCode()
{
Expand Down Expand Up @@ -452,14 +452,37 @@ public void WriteTo(CodedOutputStream output, Codec codec)
WriteContext.Initialize(output, out WriteContext ctx);
try
{
WriteTo(ref ctx, codec);
IEnumerable<KeyValuePair<TKey, TValue>> listToWrite = list;

if (output.Deterministic)
{
listToWrite = GetSortedListCopy(list);
}
WriteTo(ref ctx, codec, listToWrite);
}
finally
{
ctx.CopyStateTo(output);
}
}

internal IEnumerable<KeyValuePair<TKey, TValue>> GetSortedListCopy(IEnumerable<KeyValuePair<TKey, TValue>> listToSort)
{
// We can't sort the list in place, as that would invalidate the linked list.
// Instead, we create a new list, sort that, and then write it out.
var listToWrite = new List<KeyValuePair<TKey, TValue>>(listToSort);
listToWrite.Sort((pair1, pair2) =>
{
if (typeof(TKey) == typeof(string))
{
// Use Ordinal, otherwise Comparer<string>.Default uses StringComparer.CurrentCulture
return StringComparer.Ordinal.Compare(pair1.Key.ToString(), pair2.Key.ToString());
}
return Comparer<TKey>.Default.Compare(pair1.Key, pair2.Key);
});
return listToWrite;
}

/// <summary>
/// Writes the contents of this map to the given write context, using the specified codec
/// to encode each entry.
Expand All @@ -469,7 +492,18 @@ public void WriteTo(CodedOutputStream output, Codec codec)
[SecuritySafeCritical]
public void WriteTo(ref WriteContext ctx, Codec codec)
{
foreach (var entry in list)
IEnumerable<KeyValuePair<TKey, TValue>> listToWrite = list;
if (ctx.state.CodedOutputStream?.Deterministic ?? false)
{
listToWrite = GetSortedListCopy(list);
}
WriteTo(ref ctx, codec, listToWrite);
}

[SecuritySafeCritical]
private void WriteTo(ref WriteContext ctx, Codec codec, IEnumerable<KeyValuePair<TKey, TValue>> listKvp)
{
foreach (var entry in listKvp)
{
ctx.WriteTag(codec.MapTag);

Expand Down Expand Up @@ -651,7 +685,7 @@ internal MapView(
this.containsCheck = containsCheck;
}

public int Count => parent.Count;
public int Count => parent.Count;

public bool IsReadOnly => true;

Expand Down
Loading