Skip to content

Commit

Permalink
Correctly handle quads with the default graph name
Browse files Browse the repository at this point in the history
Fixes #613. Closes #612.
  • Loading branch information
mmarx committed Feb 5, 2025
1 parent a9e2532 commit 4fe042c
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 37 deletions.
7 changes: 7 additions & 0 deletions nemo/src/io/formats/rdf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ use crate::{
use super::FileFormatMeta;
use super::{ExportHandler, FormatBuilder, ImportHandler, TableWriter};

/// IRI to be used for the default graph used by Nemo when loading RDF data with
/// named graphs (quads).
///
/// SPARQL 1.1 has failed to provide any standard identifier for this purpose.
/// If future SPARQL or RDF versions are adding this, we could align accordingly.
const DEFAULT_GRAPH_IRI: &str = "tag:nemo:defaultgraph";

/// The different supported variants of the RDF format.
#[derive(Assoc, Debug, Clone, Copy, PartialEq, Eq, VariantArray)]
#[func(pub fn media_type(&self) -> &'static str)]
Expand Down
11 changes: 2 additions & 9 deletions nemo/src/io/formats/rdf/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,9 @@ use crate::io::formats::PROGRESS_NOTIFY_INCREMENT;
use super::{
error::RdfFormatError,
value_format::{RdfValueFormat, RdfValueFormats},
RdfVariant,
RdfVariant, DEFAULT_GRAPH_IRI,
};

/// IRI to be used for the default graph used by Nemo when loading RDF data with
/// named graphs (quads).
///
/// SPARQL 1.1 has failed to provide any standard identifier for this purpose.
/// If future SPARQL or RDF versions are adding this, we could align accordingly.
const DEFAULT_GRAPH: &str = "tag:nemo:defaultgraph";

/// A [TableProvider] for RDF 1.1 files containing triples.
pub(super) struct RdfReader {
/// Buffer from which content is read
Expand Down Expand Up @@ -157,7 +150,7 @@ impl RdfReader {
value: Option<GraphName<'_>>,
) -> Result<AnyDataValue, RdfFormatError> {
match value {
None => Ok(AnyDataValue::new_iri(DEFAULT_GRAPH.to_string())),
None => Ok(AnyDataValue::new_iri(DEFAULT_GRAPH_IRI.to_string())),
Some(GraphName::NamedNode(nn)) => Ok(Self::datavalue_from_named_node(nn)),
Some(GraphName::BlankNode(bn)) => {
Ok(Self::datavalue_from_blank_node(bnode_map, tuple_writer, bn))
Expand Down
82 changes: 54 additions & 28 deletions nemo/src/io/formats/rdf/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::{

use super::{
value_format::{RdfValueFormat, RdfValueFormats},
RdfVariant,
RdfVariant, DEFAULT_GRAPH_IRI,
};

/// Private struct to record the type of an RDF term that
Expand All @@ -31,13 +31,43 @@ enum RdfTermType {
SimpleStringLiteral,
}

#[derive(Debug, Default)]
enum QuadGraphName {
#[default]
DefaultGraph,
NamedNode(String),
BlankNode(String),
}

#[derive(Debug)]
struct InvalidGraphNameError;

impl TryFrom<&AnyDataValue> for QuadGraphName {
type Error = InvalidGraphNameError;

fn try_from(value: &AnyDataValue) -> Result<Self, Self::Error> {
match value.value_domain() {
ValueDomain::Iri => {
let iri = value.to_iri_unchecked();

if iri == DEFAULT_GRAPH_IRI {
Ok(Self::DefaultGraph)
} else {
Ok(Self::NamedNode(iri))
}
}
ValueDomain::Null => Ok(Self::BlankNode(value.lexical_value())),
_ => Err(InvalidGraphNameError),
}
}
}

/// Struct to store information of one quad (or triple) for export.
/// This is necessary since all RIO RDF term implementations use `&str`
/// pointers internally, that must be owned elsewhere.
#[derive(Debug, Default)]
struct QuadBuffer {
graph_name_is_blank: bool,
graph_name: String,
graph_name: QuadGraphName,
subject_is_blank: bool,
subject: String,
predicate: String,
Expand Down Expand Up @@ -88,15 +118,15 @@ impl<'a> QuadBuffer {
}
}

fn graph_name(&'a self) -> GraphName<'a> {
if self.graph_name_is_blank {
GraphName::BlankNode(BlankNode {
id: self.graph_name.as_str(),
})
} else {
GraphName::NamedNode(NamedNode {
iri: self.graph_name.as_str(),
})
fn graph_name(&'a self) -> Option<GraphName<'a>> {
match &self.graph_name {
QuadGraphName::DefaultGraph => None,
QuadGraphName::NamedNode(iri) => {
Some(GraphName::NamedNode(NamedNode { iri: iri.as_str() }))
}
QuadGraphName::BlankNode(id) => {
Some(GraphName::BlankNode(BlankNode { id: id.as_str() }))
}
}
}

Expand Down Expand Up @@ -170,20 +200,13 @@ impl<'a> QuadBuffer {
true
}

fn set_graph_name_from_datavalue(&mut self, datavalue: &AnyDataValue) -> bool {
match datavalue.value_domain() {
ValueDomain::Iri => {
self.graph_name = datavalue.to_iri_unchecked();
self.graph_name_is_blank = false;
true
}
ValueDomain::Null => {
self.graph_name = datavalue.lexical_value();
self.graph_name_is_blank = true;
true
}
_ => false,
}
fn set_graph_name_from_datavalue(
&mut self,
datavalue: &AnyDataValue,
) -> Result<(), InvalidGraphNameError> {
self.graph_name = QuadGraphName::try_from(datavalue)?;

Ok(())
}
}

Expand Down Expand Up @@ -318,14 +341,17 @@ impl RdfWriter {
if !buffer.set_object_from_datavalue(&record[o_pos]) {
continue;
}
if !buffer.set_graph_name_from_datavalue(&record[g_pos]) {
if buffer
.set_graph_name_from_datavalue(&record[g_pos])
.is_err()
{
continue;
}
if let Err(e) = formatter.format(&Quad {
subject: buffer.subject(),
predicate: buffer.predicate(),
object: buffer.object(),
graph_name: Some(buffer.graph_name()),
graph_name: buffer.graph_name(),
}) {
log::debug!("failed to write quad: {e}");
drop_count += 1;
Expand Down

0 comments on commit 4fe042c

Please sign in to comment.