Skip to content

Commit

Permalink
Include state in result of RDF.Canonicalization.canonicalize/2
Browse files Browse the repository at this point in the history
  • Loading branch information
marcelotto committed Feb 29, 2024
1 parent db6b020 commit cf1eddd
Show file tree
Hide file tree
Showing 9 changed files with 107 additions and 34 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ Elixir versions < 1.12 are no longer supported
with the `:hash_algorithm` keyword option or the `:canon_hash_algorithm` application
runtime configuration.

### Changed

- `RDF.Canonicalization.canonicalize/2` now returns the canonicalized dataset in a
tuple along with final state containing the _input blank node identifier map_ and
the _issued identifiers map_ as required by the RDF dataset canonicalization
specification

### Fixed

- `RDF.Dataset.put/3` with a `RDF.Dataset` input didn't respect the `:graph` option to
Expand Down
50 changes: 25 additions & 25 deletions lib/rdf/canonicalization/canonicalization.ex
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,9 @@ defmodule RDF.Canonicalization do
#{@hash_algorithm_config_doc}
## Example
iex> RDF.Graph.new([{~B<foo>, EX.p(), ~B<bar>}, {~B<bar>, EX.p(), ~B<foo>}])
...> |> RDF.Canonicalization.canonicalize()
RDF.Dataset.new([{~B<c14n0>, EX.p(), ~B<c14n1>}, {~B<c14n1>, EX.p(), ~B<c14n0>}])
"""
@spec canonicalize(RDF.Graph.t() | RDF.Dataset.t(), keyword) :: RDF.Dataset.t()
@spec canonicalize(RDF.Graph.t() | RDF.Dataset.t(), keyword) :: {RDF.Dataset.t(), State.t()}
def canonicalize(input, opts \\ []) do
rdfc10(input, opts)
end
Expand All @@ -64,7 +59,9 @@ defmodule RDF.Canonicalization do
@spec isomorphic?(RDF.Graph.t() | RDF.Dataset.t(), RDF.Graph.t() | RDF.Dataset.t(), keyword) ::
boolean
def isomorphic?(a, b, opts \\ []) do
a |> canonicalize(opts) |> Dataset.equal?(canonicalize(b, opts))
{canon_a, _} = canonicalize(a, opts)
{canon_b, _} = canonicalize(b, opts)
Dataset.equal?(canon_a, canon_b)
end

defp rdfc10(input, opts) do
Expand Down Expand Up @@ -136,24 +133,27 @@ defmodule RDF.Canonicalization do

# 6)
defp apply_canonicalization(state, data) do
Enum.reduce(data, Dataset.new(), fn statement, canonicalized_data ->
Dataset.add(
canonicalized_data,
if Statement.has_bnode?(statement) do
Statement.map(statement, fn
{_, %BlankNode{} = bnode} ->
state.canonical_issuer
|> IdentifierIssuer.identifier(bnode)
|> BlankNode.new()

{_, node} ->
node
end)
else
statement
end
)
end)
dataset =
Enum.reduce(data, Dataset.new(), fn statement, canonicalized_data ->
Dataset.add(
canonicalized_data,
if Statement.has_bnode?(statement) do
Statement.map(statement, fn
{_, %BlankNode{} = bnode} ->
state.canonical_issuer
|> IdentifierIssuer.identifier(bnode)
|> BlankNode.new()

{_, node} ->
node
end)
else
statement
end
)
end)

{dataset, state}
end

# see https://www.w3.org/TR/rdf-canon/#hash-1d-quads
Expand Down
2 changes: 2 additions & 0 deletions lib/rdf/canonicalization/state.ex
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ defmodule RDF.Canonicalization.State do
canonical_issuer: IdentifierIssuer.canonical(),
hash_algorithm: nil

@type t :: %__MODULE__{}

def new(input, opts) do
hash_algorithm = Keyword.get_lazy(opts, :hash_algorithm, &default_hash_algorithm/0)

Expand Down
17 changes: 16 additions & 1 deletion lib/rdf/model/dataset.ex
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,22 @@ defmodule RDF.Dataset do

defdelegate isomorphic?(a, b), to: RDF.Canonicalization

defdelegate canonicalize(input), to: RDF.Canonicalization
@doc """
Canonicalizes the blank nodes of a dataset according to the RDF Dataset Canonicalization spec.
## Example
iex> RDF.Dataset.new([{~B<foo>, EX.p(), ~B<bar>}, {~B<bar>, EX.p(), ~B<foo>}])
...> |> RDF.Dataset.canonicalize()
RDF.Dataset.new([{~B<c14n0>, EX.p(), ~B<c14n1>}, {~B<c14n1>, EX.p(), ~B<c14n0>}])
"""
@spec canonicalize(RDF.Dataset.t() | RDF.Graph.t(), keyword) :: RDF.Dataset.t()
def canonicalize(%graph_or_dataset{} = dataset, opts \\ [])
when graph_or_dataset in [__MODULE__, Graph] do
{canonicalized_dataset, _} = RDF.Canonicalization.canonicalize(dataset, opts)
canonicalized_dataset
end

@doc """
Returns the aggregated prefixes of all graphs of `dataset` as a `RDF.PrefixMap`.
Expand Down
5 changes: 2 additions & 3 deletions lib/rdf/model/graph.ex
Original file line number Diff line number Diff line change
Expand Up @@ -1345,9 +1345,8 @@ defmodule RDF.Graph do
"""
@spec canonicalize(RDF.Graph.t(), keyword) :: RDF.Graph.t()
def canonicalize(%__MODULE__{} = graph, opts \\ []) do
graph
|> RDF.Canonicalization.canonicalize(opts)
|> Dataset.default_graph()
{canonicalized_dataset, _} = RDF.Canonicalization.canonicalize(graph, opts)
Dataset.default_graph(canonicalized_dataset)
end

@doc """
Expand Down
1 change: 1 addition & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ defmodule RDF.Mixfile do
{:credo, "~> 1.7", only: [:dev, :test], runtime: false},
{:dialyxir, "~> 1.4", only: [:dev, :test], runtime: false},
{:ex_doc, "~> 0.31", only: :dev, runtime: false},
{:jason, "~> 1.4", only: [:dev, :test]},
{:excoveralls, "~> 0.18", only: :test},
# This dependency is needed for ExCoveralls when OTP < 25
{:castore, "~> 1.0", only: :test},
Expand Down
33 changes: 29 additions & 4 deletions test/acceptance/canonicalization_w3c_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ defmodule RDF.Canonicalization.W3C.Test do
use ExUnit.Case, async: false
use EarlFormatter, test_suite: :rdf_canon

alias RDF.{TestSuite, NQuads, Canonicalization}
alias RDF.{TestSuite, NQuads, Canonicalization, BlankNode}
alias TestSuite.NS.RDFC

@path RDF.TestData.path("rdf-canon-tests")
Expand All @@ -24,9 +24,34 @@ defmodule RDF.Canonicalization.W3C.Test do
input = test_case_file(test_case, &TestSuite.test_input_file/1)
result = test_case_file(test_case, &TestSuite.test_output_file/1)

assert NQuads.read_file!(input, base: file_url)
|> Canonicalization.canonicalize(hash_algorithm_opts(test_case)) ==
NQuads.read_file!(result)
assert {canonicalized_dataset, _} =
NQuads.read_file!(input, base: file_url)
|> Canonicalization.canonicalize(hash_algorithm_opts(test_case))

assert canonicalized_dataset == NQuads.read_file!(result)
end
end)

TestSuite.test_cases(@manifest, RDFC.RDFC10MapTest)
|> Enum.each(fn test_case ->
@tag test_case: test_case
test TestSuite.test_title(test_case), %{test_case: test_case} do
file_url = to_string(TestSuite.test_input_file(test_case))
input = test_case_file(test_case, &TestSuite.test_input_file/1)

result =
test_case
|> test_case_file(&TestSuite.test_output_file/1)
|> File.read!()
|> Jason.decode!()

assert {_, state} =
NQuads.read_file!(input, base: file_url)
|> Canonicalization.canonicalize(hash_algorithm_opts(test_case))

assert Map.new(state.canonical_issuer.issued_identifiers, fn
{id, issued} -> {BlankNode.value(id), issued}
end) == result
end
end)

Expand Down
2 changes: 1 addition & 1 deletion test/support/test_suite.ex
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ defmodule RDF.TestSuite do

defvocab RDFC,
base_iri: "https://w3c.github.io/rdf-canon/tests/vocab#",
terms: ~w[RDFC10EvalTest hashAlgorithm]
terms: ~w[RDFC10EvalTest RDFC10NegativeEvalTest RDFC10MapTest hashAlgorithm]
end

@compile {:no_warn_undefined, RDF.TestSuite.NS.MF}
Expand Down
24 changes: 24 additions & 0 deletions test/unit/canonicalization/canonicalization_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,31 @@ defmodule RDF.CanonicalizationTest do

doctest RDF.Canonicalization

alias RDF.Canonicalization

describe "canonicalize/1" do
test "returns a tuple with the state" do
expected_dataset =
Dataset.new([{~B<c14n0>, EX.p(), ~B<c14n1>}, {~B<c14n1>, EX.p(), ~B<c14n0>}])

assert {
^expected_dataset,
%Canonicalization.State{
canonical_issuer: %Canonicalization.IdentifierIssuer{
identifier_prefix: "c14n",
issued_identifiers: %{~B<bar> => "c14n0", ~B<foo> => "c14n1"}
},
hash_algorithm: :sha256
}
} =
[
{~B<foo>, EX.p(), ~B<bar>},
{~B<bar>, EX.p(), ~B<foo>}
]
|> Graph.new()
|> Canonicalization.canonicalize()
end

test "the canonicalization of a RDF.Graph is equal to this RDF.Graph in a canonicalized RDF.Dataset" do
graph =
Graph.build do
Expand Down

0 comments on commit cf1eddd

Please sign in to comment.