From cf1edddbc61fa02668048281f7e2ac3d966d6da5 Mon Sep 17 00:00:00 2001 From: Marcel Otto Date: Thu, 29 Feb 2024 14:14:53 +0100 Subject: [PATCH] Include state in result of RDF.Canonicalization.canonicalize/2 --- CHANGELOG.md | 7 +++ lib/rdf/canonicalization/canonicalization.ex | 50 +++++++++---------- lib/rdf/canonicalization/state.ex | 2 + lib/rdf/model/dataset.ex | 17 ++++++- lib/rdf/model/graph.ex | 5 +- mix.exs | 1 + test/acceptance/canonicalization_w3c_test.exs | 33 ++++++++++-- test/support/test_suite.ex | 2 +- .../canonicalization_test.exs | 24 +++++++++ 9 files changed, 107 insertions(+), 34 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 795259f2..962b0771 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,13 @@ Elixir versions < 1.12 are no longer supported with the `:hash_algorithm` keyword option or the `:canon_hash_algorithm` application runtime configuration. +### Changed + +- `RDF.Canonicalization.canonicalize/2` now returns the canonicalized dataset in a + tuple along with final state containing the _input blank node identifier map_ and + the _issued identifiers map_ as required by the RDF dataset canonicalization + specification + ### Fixed - `RDF.Dataset.put/3` with a `RDF.Dataset` input didn't respect the `:graph` option to diff --git a/lib/rdf/canonicalization/canonicalization.ex b/lib/rdf/canonicalization/canonicalization.ex index 53474032..7782ee1a 100644 --- a/lib/rdf/canonicalization/canonicalization.ex +++ b/lib/rdf/canonicalization/canonicalization.ex @@ -30,14 +30,9 @@ defmodule RDF.Canonicalization do #{@hash_algorithm_config_doc} - ## Example - - iex> RDF.Graph.new([{~B, EX.p(), ~B}, {~B, EX.p(), ~B}]) - ...> |> RDF.Canonicalization.canonicalize() - RDF.Dataset.new([{~B, EX.p(), ~B}, {~B, EX.p(), ~B}]) """ - @spec canonicalize(RDF.Graph.t() | RDF.Dataset.t(), keyword) :: RDF.Dataset.t() + @spec canonicalize(RDF.Graph.t() | RDF.Dataset.t(), keyword) :: {RDF.Dataset.t(), State.t()} def canonicalize(input, opts \\ []) do rdfc10(input, opts) end @@ -64,7 +59,9 @@ defmodule RDF.Canonicalization do @spec isomorphic?(RDF.Graph.t() | RDF.Dataset.t(), RDF.Graph.t() | RDF.Dataset.t(), keyword) :: boolean def isomorphic?(a, b, opts \\ []) do - a |> canonicalize(opts) |> Dataset.equal?(canonicalize(b, opts)) + {canon_a, _} = canonicalize(a, opts) + {canon_b, _} = canonicalize(b, opts) + Dataset.equal?(canon_a, canon_b) end defp rdfc10(input, opts) do @@ -136,24 +133,27 @@ defmodule RDF.Canonicalization do # 6) defp apply_canonicalization(state, data) do - Enum.reduce(data, Dataset.new(), fn statement, canonicalized_data -> - Dataset.add( - canonicalized_data, - if Statement.has_bnode?(statement) do - Statement.map(statement, fn - {_, %BlankNode{} = bnode} -> - state.canonical_issuer - |> IdentifierIssuer.identifier(bnode) - |> BlankNode.new() - - {_, node} -> - node - end) - else - statement - end - ) - end) + dataset = + Enum.reduce(data, Dataset.new(), fn statement, canonicalized_data -> + Dataset.add( + canonicalized_data, + if Statement.has_bnode?(statement) do + Statement.map(statement, fn + {_, %BlankNode{} = bnode} -> + state.canonical_issuer + |> IdentifierIssuer.identifier(bnode) + |> BlankNode.new() + + {_, node} -> + node + end) + else + statement + end + ) + end) + + {dataset, state} end # see https://www.w3.org/TR/rdf-canon/#hash-1d-quads diff --git a/lib/rdf/canonicalization/state.ex b/lib/rdf/canonicalization/state.ex index d005e302..d9381b10 100644 --- a/lib/rdf/canonicalization/state.ex +++ b/lib/rdf/canonicalization/state.ex @@ -13,6 +13,8 @@ defmodule RDF.Canonicalization.State do canonical_issuer: IdentifierIssuer.canonical(), hash_algorithm: nil + @type t :: %__MODULE__{} + def new(input, opts) do hash_algorithm = Keyword.get_lazy(opts, :hash_algorithm, &default_hash_algorithm/0) diff --git a/lib/rdf/model/dataset.ex b/lib/rdf/model/dataset.ex index f2db3d7a..20b3aaca 100644 --- a/lib/rdf/model/dataset.ex +++ b/lib/rdf/model/dataset.ex @@ -1029,7 +1029,22 @@ defmodule RDF.Dataset do defdelegate isomorphic?(a, b), to: RDF.Canonicalization - defdelegate canonicalize(input), to: RDF.Canonicalization + @doc """ + Canonicalizes the blank nodes of a dataset according to the RDF Dataset Canonicalization spec. + + ## Example + + iex> RDF.Dataset.new([{~B, EX.p(), ~B}, {~B, EX.p(), ~B}]) + ...> |> RDF.Dataset.canonicalize() + RDF.Dataset.new([{~B, EX.p(), ~B}, {~B, EX.p(), ~B}]) + + """ + @spec canonicalize(RDF.Dataset.t() | RDF.Graph.t(), keyword) :: RDF.Dataset.t() + def canonicalize(%graph_or_dataset{} = dataset, opts \\ []) + when graph_or_dataset in [__MODULE__, Graph] do + {canonicalized_dataset, _} = RDF.Canonicalization.canonicalize(dataset, opts) + canonicalized_dataset + end @doc """ Returns the aggregated prefixes of all graphs of `dataset` as a `RDF.PrefixMap`. diff --git a/lib/rdf/model/graph.ex b/lib/rdf/model/graph.ex index 75761d6a..0baa4396 100644 --- a/lib/rdf/model/graph.ex +++ b/lib/rdf/model/graph.ex @@ -1345,9 +1345,8 @@ defmodule RDF.Graph do """ @spec canonicalize(RDF.Graph.t(), keyword) :: RDF.Graph.t() def canonicalize(%__MODULE__{} = graph, opts \\ []) do - graph - |> RDF.Canonicalization.canonicalize(opts) - |> Dataset.default_graph() + {canonicalized_dataset, _} = RDF.Canonicalization.canonicalize(graph, opts) + Dataset.default_graph(canonicalized_dataset) end @doc """ diff --git a/mix.exs b/mix.exs index fd90100e..606d6f2b 100644 --- a/mix.exs +++ b/mix.exs @@ -77,6 +77,7 @@ defmodule RDF.Mixfile do {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, {:dialyxir, "~> 1.4", only: [:dev, :test], runtime: false}, {:ex_doc, "~> 0.31", only: :dev, runtime: false}, + {:jason, "~> 1.4", only: [:dev, :test]}, {:excoveralls, "~> 0.18", only: :test}, # This dependency is needed for ExCoveralls when OTP < 25 {:castore, "~> 1.0", only: :test}, diff --git a/test/acceptance/canonicalization_w3c_test.exs b/test/acceptance/canonicalization_w3c_test.exs index 87a65c4f..2372986e 100644 --- a/test/acceptance/canonicalization_w3c_test.exs +++ b/test/acceptance/canonicalization_w3c_test.exs @@ -8,7 +8,7 @@ defmodule RDF.Canonicalization.W3C.Test do use ExUnit.Case, async: false use EarlFormatter, test_suite: :rdf_canon - alias RDF.{TestSuite, NQuads, Canonicalization} + alias RDF.{TestSuite, NQuads, Canonicalization, BlankNode} alias TestSuite.NS.RDFC @path RDF.TestData.path("rdf-canon-tests") @@ -24,9 +24,34 @@ defmodule RDF.Canonicalization.W3C.Test do input = test_case_file(test_case, &TestSuite.test_input_file/1) result = test_case_file(test_case, &TestSuite.test_output_file/1) - assert NQuads.read_file!(input, base: file_url) - |> Canonicalization.canonicalize(hash_algorithm_opts(test_case)) == - NQuads.read_file!(result) + assert {canonicalized_dataset, _} = + NQuads.read_file!(input, base: file_url) + |> Canonicalization.canonicalize(hash_algorithm_opts(test_case)) + + assert canonicalized_dataset == NQuads.read_file!(result) + end + end) + + TestSuite.test_cases(@manifest, RDFC.RDFC10MapTest) + |> Enum.each(fn test_case -> + @tag test_case: test_case + test TestSuite.test_title(test_case), %{test_case: test_case} do + file_url = to_string(TestSuite.test_input_file(test_case)) + input = test_case_file(test_case, &TestSuite.test_input_file/1) + + result = + test_case + |> test_case_file(&TestSuite.test_output_file/1) + |> File.read!() + |> Jason.decode!() + + assert {_, state} = + NQuads.read_file!(input, base: file_url) + |> Canonicalization.canonicalize(hash_algorithm_opts(test_case)) + + assert Map.new(state.canonical_issuer.issued_identifiers, fn + {id, issued} -> {BlankNode.value(id), issued} + end) == result end end) diff --git a/test/support/test_suite.ex b/test/support/test_suite.ex index 62024a28..a3204880 100644 --- a/test/support/test_suite.ex +++ b/test/support/test_suite.ex @@ -26,7 +26,7 @@ defmodule RDF.TestSuite do defvocab RDFC, base_iri: "https://w3c.github.io/rdf-canon/tests/vocab#", - terms: ~w[RDFC10EvalTest hashAlgorithm] + terms: ~w[RDFC10EvalTest RDFC10NegativeEvalTest RDFC10MapTest hashAlgorithm] end @compile {:no_warn_undefined, RDF.TestSuite.NS.MF} diff --git a/test/unit/canonicalization/canonicalization_test.exs b/test/unit/canonicalization/canonicalization_test.exs index 41412f97..0b8c8787 100644 --- a/test/unit/canonicalization/canonicalization_test.exs +++ b/test/unit/canonicalization/canonicalization_test.exs @@ -3,7 +3,31 @@ defmodule RDF.CanonicalizationTest do doctest RDF.Canonicalization + alias RDF.Canonicalization + describe "canonicalize/1" do + test "returns a tuple with the state" do + expected_dataset = + Dataset.new([{~B, EX.p(), ~B}, {~B, EX.p(), ~B}]) + + assert { + ^expected_dataset, + %Canonicalization.State{ + canonical_issuer: %Canonicalization.IdentifierIssuer{ + identifier_prefix: "c14n", + issued_identifiers: %{~B => "c14n0", ~B => "c14n1"} + }, + hash_algorithm: :sha256 + } + } = + [ + {~B, EX.p(), ~B}, + {~B, EX.p(), ~B} + ] + |> Graph.new() + |> Canonicalization.canonicalize() + end + test "the canonicalization of a RDF.Graph is equal to this RDF.Graph in a canonicalized RDF.Dataset" do graph = Graph.build do