From 8bac660f91cfc211792af97289804b847df532b4 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Mon, 6 Dec 2021 11:07:58 -0600 Subject: [PATCH 1/2] dagcbor: coerce undef to null. CBOR's "undefined" token -- byte 23; see https://datatracker.ietf.org/doc/html/rfc8949#section-5.7 or https://datatracker.ietf.org/doc/html/rfc8949#section-3.3 table 4 -- is not generally considered part of DAG-CBOR (which is generally a subset of CBOR). However, it may be desirable to allow parsing data flagged as DAG-CBOR while coercing any "undefined" tokens to a "null", even if considering it noncanonical. Because DAG-CBOR was not well specified early on, and because libraries existed for a considerable period of time which would readily emit data that we would not call DAG-CBOR, data exists in the wild which encoded these tokens, and links to them with an indicator that it's DAG-CBOR. We may wish to continue allowing these documents to be parsed. This does *not* mean that our DAG-CBOR codec will emit "undefined" tokens. That would not be neither canonical nor valid DAG-CBOR, so we don't want to; and it would also not be possible to describe with this library in the first place (it's not in the IPLD data model; so how would you?). This does *not* mean that you can *read* DAG-CBOR data with this codec and see "undefined" tokens distinctly. Again, they're not in the IPLD data model, so this library API (intentionally) does not have the ability to describe that. With this change, an "undefined" token will be allowed by the parser, but will be described as "null" in terms of the IPLD data model, and not be distinguishable from any other null. A change to the DAG-CBOR specs may be required if we decide to embrace this behavior and expansion of what we consider non-canonical-but-valid DAG-CBOR. --- codec/dagcbor/unmarshal.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/codec/dagcbor/unmarshal.go b/codec/dagcbor/unmarshal.go index 759e3cf1..62e29a02 100644 --- a/codec/dagcbor/unmarshal.go +++ b/codec/dagcbor/unmarshal.go @@ -49,7 +49,9 @@ func (cfg DecodeOptions) Decode(na datamodel.NodeAssembler, r io.Reader) error { return na2.DecodeDagCbor(r) } // Okay, generic builder path. - return Unmarshal(na, cbor.NewDecoder(cbor.DecodeOptions{}, r), cfg) + return Unmarshal(na, cbor.NewDecoder(cbor.DecodeOptions{ + CoerceUndefToNull: true, + }, r), cfg) } // Future work: we would like to remove the Unmarshal function, From 0683bbe320645b42a0da4e678f9715851f882e01 Mon Sep 17 00:00:00 2001 From: Eric Myhre Date: Tue, 7 Dec 2021 11:38:36 -0600 Subject: [PATCH 2/2] dagcbor: test that undefined token parses and becomes treated as null. (Made a one-off test for this. It should also be a fixture in the ipld/ipld metarepo, but we don't already have those wired up, so I'm making the more incremental diff today.) --- codec/dagcbor/unmarshal_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/codec/dagcbor/unmarshal_test.go b/codec/dagcbor/unmarshal_test.go index 7d6557df..d0a8d815 100644 --- a/codec/dagcbor/unmarshal_test.go +++ b/codec/dagcbor/unmarshal_test.go @@ -4,6 +4,8 @@ import ( "strings" "testing" + "github.com/ipld/go-ipld-prime/datamodel" + qt "github.com/frankban/quicktest" "github.com/ipld/go-ipld-prime/node/basicnode" @@ -38,4 +40,13 @@ func TestFunBlocks(t *testing.T) { err := Decode(nb, buf) qt.Assert(t, err, qt.Equals, ErrAllocationBudgetExceeded) }) + t.Run("undef", func(t *testing.T) { + // This fixture tests that we tolerate cbor's "undefined" token (even though it's noncanonical and you shouldn't use it), + // and that it becomes a null in the data model level. + buf := strings.NewReader("\xf7") + nb := basicnode.Prototype.Any.NewBuilder() + err := Decode(nb, buf) + qt.Assert(t, err, qt.IsNil) + qt.Assert(t, nb.Build().Kind(), qt.Equals, datamodel.Kind_Null) + }) }