Skip to content

Commit

Permalink
add C# deterministic serialization (#13160)
Browse files Browse the repository at this point in the history
#12881

@jskeet  review please :-) as this is my first contrib to protobuf, any design / performance feedback is very sought after

Closes #13160

COPYBARA_INTEGRATE_REVIEW=#13160 from fmg-lydonchandra:feature/12881_cs_serialization_deterministic ab7e01b
PiperOrigin-RevId: 568448399
  • Loading branch information
fmg-lydonchandra authored and copybara-github committed Sep 26, 2023
1 parent 67b9c76 commit 96100bf
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 6 deletions.
53 changes: 53 additions & 0 deletions csharp/src/Google.Protobuf.Test/Collections/MapFieldTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,59 @@ public void IDictionaryValues_Equals_IReadOnlyDictionaryValues()
CollectionAssert.AreEquivalent(((IDictionary<string, string>)map).Values, ((IReadOnlyDictionary<string, string>)map).Values);
}

[Test]
public void SortIntKeys_RandomOrder()
{
var map = new MapField<int, string>() { { 1, "val" }, { -1, "val"}, { 0, "val" } };
var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
var sortedKeys = sortedList.Select(kvp => kvp.Key);
CollectionAssert.AreEqual(new[] { -1, 0, 1 }, sortedKeys);
}

[Test]
public void SortIntKeys_Empty()
{
var map = new MapField<int, string> { };
var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
var sortedKeys = sortedList.Select(kvp => kvp.Key);
Assert.IsEmpty(sortedKeys);
}

[Test]
public void SortStringKeys_RandomOrder()
{
var map = new MapField<string, string> { { "a", "val" }, { "c", "val" }, { "b", "val" } };
var sortedList = map.GetSortedListCopy(map.ToList()).ToList();
var sortedKeys = sortedList.Select(kvp => kvp.Key);
CollectionAssert.AreEqual(new[] { "a", "b", "c" }, sortedKeys);
}

[Test]
public void SortStringKeys_EnsureOrdinalSort()
{
var map = new MapField<string, string>
{
{ "i", "val" } , { "I", "val" }, { "ı", "val" }, { "İ", "val" }
};
var sortedList = map.GetSortedListCopy(map.ToList());
var sortedKeys = sortedList.Select(kvp => kvp.Key);
// Assert Ordinal sort I, i, ı, İ (Non-ordinal sort returns i, I, İ, ı)
// I == 0x49 , i == 0x69 , İ == 0x130 , ı == 0x131
CollectionAssert.AreEqual(new[] { "I", "i", "İ", "ı" }, sortedKeys);
}

[Test]
public void SortBoolKeys()
{
var map = new MapField<bool, string>
{
{ true, "val" } , { false, "val" }
};
var sortedList = map.GetSortedListCopy(map.ToList());
var sortedKeys = sortedList.Select(kvp => kvp.Key);
CollectionAssert.AreEqual(new[] { false, true }, sortedKeys);
}

private static KeyValuePair<TKey, TValue> NewKeyValuePair<TKey, TValue>(TKey key, TValue value)
{
return new KeyValuePair<TKey, TValue>(key, value);
Expand Down
75 changes: 74 additions & 1 deletion csharp/src/Google.Protobuf.Test/GeneratedMessageTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ public void MapIgnoresExtraFieldsWithinEntryMessages()

output.WriteTag(TestMap.MapInt32Int32FieldNumber, WireFormat.WireType.LengthDelimited);

var key = 10; // Field 1
var key = 10; // Field 1
var value = 20; // Field 2
var extra = 30; // Field 3

Expand Down Expand Up @@ -668,6 +668,79 @@ public void OneofSerialization_DefaultValue()
});
}

[Test]
public void MapStringString_DeterministicTrue_ThenBytesIdentical()
{
// Define three strings consisting of different versions of the letter I.
// LATIN CAPITAL LETTER I (U+0049)
string capitalLetterI = "I";
// LATIN SMALL LETTER I (U+0069)
string smallLetterI = "i";
// LATIN SMALL LETTER DOTLESS I (U+0131)
string smallLetterDotlessI = "\u0131";
var testMap1 = new TestMap();

testMap1.MapStringString.Add(smallLetterDotlessI, "value_"+smallLetterDotlessI);
testMap1.MapStringString.Add(smallLetterI, "value_"+smallLetterI);
testMap1.MapStringString.Add(capitalLetterI, "content_"+capitalLetterI);
var bytes1 = SerializeTestMap(testMap1, true);

var testMap2 = new TestMap();
testMap2.MapStringString.Add(capitalLetterI, "content_"+capitalLetterI);
testMap2.MapStringString.Add(smallLetterI, "value_"+smallLetterI);
testMap2.MapStringString.Add(smallLetterDotlessI, "value_"+smallLetterDotlessI);

var bytes2 = SerializeTestMap(testMap2, true);
var parsedBytes2 = TestMap.Parser.ParseFrom(bytes2);
var parsedBytes1 = TestMap.Parser.ParseFrom(bytes1);
Assert.IsTrue(bytes1.SequenceEqual(bytes2));
}

[Test]
public void MapInt32Bytes_DeterministicTrue_ThenBytesIdentical()
{
var testMap1 = new TestMap();
testMap1.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
testMap1.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
var bytes1 = SerializeTestMap(testMap1, true);

var testMap2 = new TestMap();
testMap2.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
testMap2.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
var bytes2 = SerializeTestMap(testMap2, true);

Assert.IsTrue(bytes1.SequenceEqual(bytes2));
}

[Test]
public void MapInt32Bytes_DeterministicFalse_ThenBytesDifferent()
{
var testMap1 = new TestMap();
testMap1.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
testMap1.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
var bytes1 = SerializeTestMap(testMap1, false);

var testMap2 = new TestMap();
testMap2.MapInt32Bytes.Add(2, ByteString.CopyFromUtf8("test2"));
testMap2.MapInt32Bytes.Add(1, ByteString.CopyFromUtf8("test1"));
var bytes2 = SerializeTestMap(testMap2, false);

Assert.IsFalse(bytes1.SequenceEqual(bytes2));
}

private byte[] SerializeTestMap(TestMap testMap, bool deterministic)
{
using var memoryStream = new MemoryStream();
var codedOutputStream = new CodedOutputStream(memoryStream);
codedOutputStream.Deterministic = deterministic;

testMap.WriteTo(codedOutputStream);
codedOutputStream.Flush();

memoryStream.Seek(0, SeekOrigin.Begin);
return memoryStream.ToArray();
}

[Test]
public void DiscardUnknownFields_RealDataStillRead()
{
Expand Down
29 changes: 28 additions & 1 deletion csharp/src/Google.Protobuf/CodedOutputStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,33 @@ public long Position
}
}

/// <summary>
/// Configures whether or not serialization is deterministic.
/// </summary>
/// <remarks>
/// Deterministic serialization guarantees that for a given binary, equal messages (defined by the
/// equals methods in protos) will always be serialized to the same bytes. This implies:
/// <list type="bullet">
/// <item><description>Repeated serialization of a message will return the same bytes.</description></item>
/// <item><description>Different processes of the same binary (which may be executing on different machines)
/// will serialize equal messages to the same bytes.</description></item>
/// </list>
/// Note the deterministic serialization is NOT canonical across languages; it is also unstable
/// across different builds with schema changes due to unknown fields. Users who need canonical
/// serialization, e.g. persistent storage in a canonical form, fingerprinting, etc, should define
/// their own canonicalization specification and implement the serializer using reflection APIs
/// rather than relying on this API.
/// Once set, the serializer will: (Note this is an implementation detail and may subject to
/// change in the future)
/// <list type="bullet">
/// <item><description>Sort map entries by keys in lexicographical order or numerical order. Note: For string
/// keys, the order is based on comparing the UTF-16 code unit value of each character in the strings.
/// The order may be different from the deterministic serialization in other languages where
/// maps are sorted on the lexicographical order of the UTF8 encoded keys.</description></item>
/// </list>
/// </remarks>
public bool Deterministic { get; set; }

#region Writing of values (not including tags)

/// <summary>
Expand Down Expand Up @@ -462,7 +489,7 @@ public void WriteRawTag(byte b1, byte b2, byte b3, byte b4, byte b5)
#endregion

#region Underlying writing primitives

/// <summary>
/// Writes a 32 bit value as a varint. The fast route is taken when
/// there's enough buffer space left to whizz through without checking
Expand Down
42 changes: 38 additions & 4 deletions csharp/src/Google.Protobuf/Collections/MapField.cs
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ bool ICollection<KeyValuePair<TKey, TValue>>.Remove(KeyValuePair<TKey, TValue> i
/// Returns a hash code for this instance.
/// </summary>
/// <returns>
/// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table.
/// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table.
/// </returns>
public override int GetHashCode()
{
Expand Down Expand Up @@ -432,14 +432,37 @@ public void WriteTo(CodedOutputStream output, Codec codec)
WriteContext.Initialize(output, out WriteContext ctx);
try
{
WriteTo(ref ctx, codec);
IEnumerable<KeyValuePair<TKey, TValue>> listToWrite = list;

if (output.Deterministic)
{
listToWrite = GetSortedListCopy(list);
}
WriteTo(ref ctx, codec, listToWrite);
}
finally
{
ctx.CopyStateTo(output);
}
}

internal IEnumerable<KeyValuePair<TKey, TValue>> GetSortedListCopy(IEnumerable<KeyValuePair<TKey, TValue>> listToSort)
{
// We can't sort the list in place, as that would invalidate the linked list.
// Instead, we create a new list, sort that, and then write it out.
var listToWrite = new List<KeyValuePair<TKey, TValue>>(listToSort);
listToWrite.Sort((pair1, pair2) =>
{
if (typeof(TKey) == typeof(string))
{
// Use Ordinal, otherwise Comparer<string>.Default uses StringComparer.CurrentCulture
return StringComparer.Ordinal.Compare(pair1.Key.ToString(), pair2.Key.ToString());
}
return Comparer<TKey>.Default.Compare(pair1.Key, pair2.Key);
});
return listToWrite;
}

/// <summary>
/// Writes the contents of this map to the given write context, using the specified codec
/// to encode each entry.
Expand All @@ -449,7 +472,18 @@ public void WriteTo(CodedOutputStream output, Codec codec)
[SecuritySafeCritical]
public void WriteTo(ref WriteContext ctx, Codec codec)
{
foreach (var entry in list)
IEnumerable<KeyValuePair<TKey, TValue>> listToWrite = list;
if (ctx.state.CodedOutputStream?.Deterministic ?? false)
{
listToWrite = GetSortedListCopy(list);
}
WriteTo(ref ctx, codec, listToWrite);
}

[SecuritySafeCritical]
private void WriteTo(ref WriteContext ctx, Codec codec, IEnumerable<KeyValuePair<TKey, TValue>> listKvp)
{
foreach (var entry in listKvp)
{
ctx.WriteTag(codec.MapTag);

Expand Down Expand Up @@ -631,7 +665,7 @@ internal MapView(
this.containsCheck = containsCheck;
}

public int Count => parent.Count;
public int Count => parent.Count;

public bool IsReadOnly => true;

Expand Down

0 comments on commit 96100bf

Please sign in to comment.