Include state in result of RDF.Canonicalization.canonicalize/2

rdf-elixir · Feb 29, 2024 · cf1eddd · cf1eddd
1 parent db6b020
commit cf1eddd
Show file tree

Hide file tree

Showing 9 changed files with 107 additions and 34 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -29,6 +29,13 @@ Elixir versions < 1.12 are no longer supported
   with the `:hash_algorithm` keyword option or the `:canon_hash_algorithm` application 
   runtime configuration.
 
+### Changed
+
+- `RDF.Canonicalization.canonicalize/2` now returns the canonicalized dataset in a 
+  tuple along with final state containing the _input blank node identifier map_ and
+  the _issued identifiers map_ as required by the RDF dataset canonicalization 
+  specification
+
 ### Fixed
 
 - `RDF.Dataset.put/3` with a `RDF.Dataset` input didn't respect the `:graph` option to 

diff --git a/lib/rdf/canonicalization/canonicalization.ex b/lib/rdf/canonicalization/canonicalization.ex
@@ -30,14 +30,9 @@ defmodule RDF.Canonicalization do
 
   #{@hash_algorithm_config_doc}
 
-  ## Example
-
-      iex> RDF.Graph.new([{~B<foo>, EX.p(), ~B<bar>}, {~B<bar>, EX.p(), ~B<foo>}])
-      ...> |> RDF.Canonicalization.canonicalize()
-      RDF.Dataset.new([{~B<c14n0>, EX.p(), ~B<c14n1>}, {~B<c14n1>, EX.p(), ~B<c14n0>}])
 
   """
-  @spec canonicalize(RDF.Graph.t() | RDF.Dataset.t(), keyword) :: RDF.Dataset.t()
+  @spec canonicalize(RDF.Graph.t() | RDF.Dataset.t(), keyword) :: {RDF.Dataset.t(), State.t()}
   def canonicalize(input, opts \\ []) do
     rdfc10(input, opts)
   end
@@ -64,7 +59,9 @@ defmodule RDF.Canonicalization do
   @spec isomorphic?(RDF.Graph.t() | RDF.Dataset.t(), RDF.Graph.t() | RDF.Dataset.t(), keyword) ::
           boolean
   def isomorphic?(a, b, opts \\ []) do
-    a |> canonicalize(opts) |> Dataset.equal?(canonicalize(b, opts))
+    {canon_a, _} = canonicalize(a, opts)
+    {canon_b, _} = canonicalize(b, opts)
+    Dataset.equal?(canon_a, canon_b)
   end
 
   defp rdfc10(input, opts) do
@@ -136,24 +133,27 @@ defmodule RDF.Canonicalization do
 
   # 6)
   defp apply_canonicalization(state, data) do
-    Enum.reduce(data, Dataset.new(), fn statement, canonicalized_data ->
-      Dataset.add(
-        canonicalized_data,
-        if Statement.has_bnode?(statement) do
-          Statement.map(statement, fn
-            {_, %BlankNode{} = bnode} ->
-              state.canonical_issuer
-              |> IdentifierIssuer.identifier(bnode)
-              |> BlankNode.new()
-
-            {_, node} ->
-              node
-          end)
-        else
-          statement
-        end
-      )
-    end)
+    dataset =
+      Enum.reduce(data, Dataset.new(), fn statement, canonicalized_data ->
+        Dataset.add(
+          canonicalized_data,
+          if Statement.has_bnode?(statement) do
+            Statement.map(statement, fn
+              {_, %BlankNode{} = bnode} ->
+                state.canonical_issuer
+                |> IdentifierIssuer.identifier(bnode)
+                |> BlankNode.new()
+
+              {_, node} ->
+                node
+            end)
+          else
+            statement
+          end
+        )
+      end)
+
+    {dataset, state}
   end
 
   # see https://www.w3.org/TR/rdf-canon/#hash-1d-quads

diff --git a/lib/rdf/canonicalization/state.ex b/lib/rdf/canonicalization/state.ex
@@ -13,6 +13,8 @@ defmodule RDF.Canonicalization.State do
             canonical_issuer: IdentifierIssuer.canonical(),
             hash_algorithm: nil
 
+  @type t :: %__MODULE__{}
+
   def new(input, opts) do
     hash_algorithm = Keyword.get_lazy(opts, :hash_algorithm, &default_hash_algorithm/0)
 

diff --git a/lib/rdf/model/dataset.ex b/lib/rdf/model/dataset.ex
@@ -1029,7 +1029,22 @@ defmodule RDF.Dataset do
 
   defdelegate isomorphic?(a, b), to: RDF.Canonicalization
 
-  defdelegate canonicalize(input), to: RDF.Canonicalization
+  @doc """
+  Canonicalizes the blank nodes of a dataset according to the RDF Dataset Canonicalization spec.
+
+  ## Example
+
+      iex> RDF.Dataset.new([{~B<foo>, EX.p(), ~B<bar>}, {~B<bar>, EX.p(), ~B<foo>}])
+      ...> |> RDF.Dataset.canonicalize()
+      RDF.Dataset.new([{~B<c14n0>, EX.p(), ~B<c14n1>}, {~B<c14n1>, EX.p(), ~B<c14n0>}])
+
+  """
+  @spec canonicalize(RDF.Dataset.t() | RDF.Graph.t(), keyword) :: RDF.Dataset.t()
+  def canonicalize(%graph_or_dataset{} = dataset, opts \\ [])
+      when graph_or_dataset in [__MODULE__, Graph] do
+    {canonicalized_dataset, _} = RDF.Canonicalization.canonicalize(dataset, opts)
+    canonicalized_dataset
+  end
 
   @doc """
   Returns the aggregated prefixes of all graphs of `dataset` as a `RDF.PrefixMap`.

diff --git a/lib/rdf/model/graph.ex b/lib/rdf/model/graph.ex
@@ -1345,9 +1345,8 @@ defmodule RDF.Graph do
   """
   @spec canonicalize(RDF.Graph.t(), keyword) :: RDF.Graph.t()
   def canonicalize(%__MODULE__{} = graph, opts \\ []) do
-    graph
-    |> RDF.Canonicalization.canonicalize(opts)
-    |> Dataset.default_graph()
+    {canonicalized_dataset, _} = RDF.Canonicalization.canonicalize(graph, opts)
+    Dataset.default_graph(canonicalized_dataset)
   end
 
   @doc """

diff --git a/mix.exs b/mix.exs
@@ -77,6 +77,7 @@ defmodule RDF.Mixfile do
       {:credo, "~> 1.7", only: [:dev, :test], runtime: false},
       {:dialyxir, "~> 1.4", only: [:dev, :test], runtime: false},
       {:ex_doc, "~> 0.31", only: :dev, runtime: false},
+      {:jason, "~> 1.4", only: [:dev, :test]},
       {:excoveralls, "~> 0.18", only: :test},
       # This dependency is needed for ExCoveralls when OTP < 25
       {:castore, "~> 1.0", only: :test},

diff --git a/test/acceptance/canonicalization_w3c_test.exs b/test/acceptance/canonicalization_w3c_test.exs
@@ -8,7 +8,7 @@ defmodule RDF.Canonicalization.W3C.Test do
   use ExUnit.Case, async: false
   use EarlFormatter, test_suite: :rdf_canon
 
-  alias RDF.{TestSuite, NQuads, Canonicalization}
+  alias RDF.{TestSuite, NQuads, Canonicalization, BlankNode}
   alias TestSuite.NS.RDFC
 
   @path RDF.TestData.path("rdf-canon-tests")
@@ -24,9 +24,34 @@ defmodule RDF.Canonicalization.W3C.Test do
       input = test_case_file(test_case, &TestSuite.test_input_file/1)
       result = test_case_file(test_case, &TestSuite.test_output_file/1)
 
-      assert NQuads.read_file!(input, base: file_url)
-             |> Canonicalization.canonicalize(hash_algorithm_opts(test_case)) ==
-               NQuads.read_file!(result)
+      assert {canonicalized_dataset, _} =
+               NQuads.read_file!(input, base: file_url)
+               |> Canonicalization.canonicalize(hash_algorithm_opts(test_case))
+
+      assert canonicalized_dataset == NQuads.read_file!(result)
+    end
+  end)
+
+  TestSuite.test_cases(@manifest, RDFC.RDFC10MapTest)
+  |> Enum.each(fn test_case ->
+    @tag test_case: test_case
+    test TestSuite.test_title(test_case), %{test_case: test_case} do
+      file_url = to_string(TestSuite.test_input_file(test_case))
+      input = test_case_file(test_case, &TestSuite.test_input_file/1)
+
+      result =
+        test_case
+        |> test_case_file(&TestSuite.test_output_file/1)
+        |> File.read!()
+        |> Jason.decode!()
+
+      assert {_, state} =
+               NQuads.read_file!(input, base: file_url)
+               |> Canonicalization.canonicalize(hash_algorithm_opts(test_case))
+
+      assert Map.new(state.canonical_issuer.issued_identifiers, fn
+               {id, issued} -> {BlankNode.value(id), issued}
+             end) == result
     end
   end)
 

diff --git a/test/support/test_suite.ex b/test/support/test_suite.ex
@@ -26,7 +26,7 @@ defmodule RDF.TestSuite do
 
     defvocab RDFC,
       base_iri: "https://w3c.github.io/rdf-canon/tests/vocab#",
-      terms: ~w[RDFC10EvalTest hashAlgorithm]
+      terms: ~w[RDFC10EvalTest RDFC10NegativeEvalTest RDFC10MapTest hashAlgorithm]
   end
 
   @compile {:no_warn_undefined, RDF.TestSuite.NS.MF}

diff --git a/test/unit/canonicalization/canonicalization_test.exs b/test/unit/canonicalization/canonicalization_test.exs
@@ -3,7 +3,31 @@ defmodule RDF.CanonicalizationTest do
 
   doctest RDF.Canonicalization
 
+  alias RDF.Canonicalization
+
   describe "canonicalize/1" do
+    test "returns a tuple with the state" do
+      expected_dataset =
+        Dataset.new([{~B<c14n0>, EX.p(), ~B<c14n1>}, {~B<c14n1>, EX.p(), ~B<c14n0>}])
+
+      assert {
+               ^expected_dataset,
+               %Canonicalization.State{
+                 canonical_issuer: %Canonicalization.IdentifierIssuer{
+                   identifier_prefix: "c14n",
+                   issued_identifiers: %{~B<bar> => "c14n0", ~B<foo> => "c14n1"}
+                 },
+                 hash_algorithm: :sha256
+               }
+             } =
+               [
+                 {~B<foo>, EX.p(), ~B<bar>},
+                 {~B<bar>, EX.p(), ~B<foo>}
+               ]
+               |> Graph.new()
+               |> Canonicalization.canonicalize()
+    end
+
     test "the canonicalization of a RDF.Graph is equal to this RDF.Graph in a canonicalized RDF.Dataset" do
       graph =
         Graph.build do