Skip to content

Commit

Permalink
Handle embedded nodes as RDF-star statements
Browse files Browse the repository at this point in the history
  • Loading branch information
rubensworks committed Jan 13, 2023
1 parent 79a5118 commit 0f54ecd
Show file tree
Hide file tree
Showing 11 changed files with 556 additions and 45 deletions.
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ Optionally, the following parameters can be set in the `JsonLdParser` constructo
* `normalizeLanguageTags`: Whether or not language tags should be normalized to lowercase. _(Default: `false` for JSON-LD 1.1 (and higher), `true` for JSON-LD 1.0)_
* `streamingProfileAllowOutOfOrderPlainType`: When the streaming profile flag is enabled, `@type` entries MUST come before other properties since they may defined a type-scoped context. However, when this flag is enabled, `@type` entries that do NOT define a type-scoped context may appear anywhere just like a regular property.. _(Default: `false`)_
* `skipContextValidation`: If JSON-LD context validation should be skipped. This is useful when parsing large contexts that are known to be valid. _(Default: `false`)_
* `rdfstar`: If embedded nodes and annotated objects should be parsed according to the [JSON-LD star specification](https://json-ld.github.io/json-ld-star/). _(Default: `true`)_
* `rdfstarReverseInEmbedded`: If embedded nodes in JSON-LD star can have reverse properties. _(Default: `false`)_

```javascript
new JsonLdParser({
Expand All @@ -179,6 +181,7 @@ new JsonLdParser({
defaultGraph: namedNode('http://example.org/graph'),
rdfDirection: 'i18n-datatype',
normalizeLanguageTags: true,
rdfstar: true,
});
```

Expand Down Expand Up @@ -239,8 +242,9 @@ Other documents will still be parsed correctly as well, with a slightly lower ef

## Streaming Profile

This parser adheres to both the [JSON-LD 1.1](https://www.w3.org/TR/json-ld/) specification
and the [JSON-LD 1.1 Streaming specification](https://w3c.github.io/json-ld-streaming/).
This parser adheres to the [JSON-LD 1.1](https://www.w3.org/TR/json-ld/) specification,
the [JSON-LD 1.1 Streaming](https://w3c.github.io/json-ld-streaming/) specification,
and the [JSON-LD star](https://json-ld.github.io/json-ld-star/) specification.

By default, this parser assumes that JSON-LD document
are *not* in the [streaming document form](https://w3c.github.io/json-ld-streaming/#streaming-document-form).
Expand All @@ -261,6 +265,8 @@ This parser implements the following [JSON-LD specifications](https://json-ld.or
* JSON-LD 1.1 - Transform JSON-LD to RDF
* JSON-LD 1.1 - Error handling
* JSON-LD 1.1 - Streaming Transform JSON-LD to RDF
* [JSON-LD star](https://json-ld.github.io/json-ld-star/) - Transform JSON-LD star to RDF
* [JSON-LD star](https://json-ld.github.io/json-ld-star/) - Error handling

## Performance

Expand Down
22 changes: 12 additions & 10 deletions lib/JsonLdParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -318,12 +318,13 @@ export class JsonLdParser extends Transform implements RDF.Sink<EventEmitter, RD
*/
public async flushBuffer(depth: number, keys: any[]) {
let subjects: RDF.Term[] = this.parsingContext.idStack[depth];
if (!subjects) {
const subjectsWasDefined = !!subjects;
if (!subjectsWasDefined) {
subjects = this.parsingContext.idStack[depth] = [ this.util.dataFactory.blankNode() ];
}

// Flush values at this level
const valueBuffer: { predicate: RDF.Term, object: RDF.Term, reverse: boolean }[] =
const valueBuffer: { predicate: RDF.Term, object: RDF.Term, reverse: boolean, isEmbedded: boolean }[] =
this.parsingContext.unidentifiedValuesBuffer[depth];
if (valueBuffer) {
for (const subject of subjects) {
Expand All @@ -336,13 +337,7 @@ export class JsonLdParser extends Transform implements RDF.Sink<EventEmitter, RD
// Flush values to stream if the graph @id is known
this.parsingContext.emittedStack[depth] = true;
for (const bufferedValue of valueBuffer) {
if (bufferedValue.reverse) {
this.parsingContext.emitQuad(depth, this.util.dataFactory.quad(
bufferedValue.object, bufferedValue.predicate, subject, graph));
} else {
this.parsingContext.emitQuad(depth, this.util.dataFactory.quad(
subject, bufferedValue.predicate, bufferedValue.object, graph));
}
this.util.emitQuadChecked(depth, subject, bufferedValue.predicate, bufferedValue.object, graph, bufferedValue.reverse, bufferedValue.isEmbedded);
}
}
} else {
Expand All @@ -355,12 +350,14 @@ export class JsonLdParser extends Transform implements RDF.Sink<EventEmitter, RD
object: subject,
predicate: bufferedValue.predicate,
subject: bufferedValue.object,
isEmbedded: bufferedValue.isEmbedded,
});
} else {
subGraphBuffer.push({
object: bufferedValue.object,
predicate: bufferedValue.predicate,
subject,
isEmbedded: bufferedValue.isEmbedded,
});
}
}
Expand Down Expand Up @@ -638,8 +635,13 @@ export interface IJsonLdParserOptions {
*/
skipContextValidation?: boolean;
/**
* If nested triples should be parsed according to the JSON-LD star specification.
* If embedded nodes and annotated objects should be parsed according to the JSON-LD star specification.
* Defaults to true
*/
rdfstar?: boolean;
/**
* If embedded nodes may use reverse properties
* Defaults to false.
*/
rdfstarReverseInEmbedded?: boolean;
}
10 changes: 6 additions & 4 deletions lib/ParsingContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export class ParsingContext {
public readonly normalizeLanguageTags?: boolean;
public readonly streamingProfileAllowOutOfOrderPlainType?: boolean;
public readonly rdfstar: boolean;
public readonly rdfstarReverseInEmbedded?: boolean;

// Stack of indicating if a depth has been touched.
public readonly processingStack: boolean[];
Expand Down Expand Up @@ -64,10 +65,10 @@ export class ParsingContext {
public readonly jsonLiteralStack: boolean[];
// Triples that don't know their subject @id yet.
// L0: stack depth; L1: values
public readonly unidentifiedValuesBuffer: { predicate: RDF.Term, object: RDF.Term, reverse: boolean }[][];
public readonly unidentifiedValuesBuffer: { predicate: RDF.Term, object: RDF.Term, reverse: boolean, isEmbedded: boolean }[][];
// Quads that don't know their graph @id yet.
// L0: stack depth; L1: values
public readonly unidentifiedGraphsBuffer: { subject: RDF.Term, predicate: RDF.Term, object: RDF.Term }[][];
public readonly unidentifiedGraphsBuffer: { subject: RDF.Term, predicate: RDF.Term, object: RDF.Term, isEmbedded: boolean }[][];

// Depths that should be still flushed
public pendingContainerFlushBuffers: { depth: number, keys: any[] }[];
Expand All @@ -94,6 +95,7 @@ export class ParsingContext {
this.normalizeLanguageTags = options.normalizeLanguageTags;
this.streamingProfileAllowOutOfOrderPlainType = options.streamingProfileAllowOutOfOrderPlainType;
this.rdfstar = options.rdfstar !== false;
this.rdfstarReverseInEmbedded = options.rdfstarReverseInEmbedded;

this.topLevelProperties = false;
this.activeProcessingMode = parseFloat(this.processingMode);
Expand Down Expand Up @@ -344,7 +346,7 @@ export class ParsingContext {
* @return {{predicate: Term; object: Term; reverse: boolean}[]} An element of
* {@link ParsingContext.unidentifiedValuesBuffer}.
*/
public getUnidentifiedValueBufferSafe(depth: number): { predicate: RDF.Term, object: RDF.Term, reverse: boolean }[] {
public getUnidentifiedValueBufferSafe(depth: number): { predicate: RDF.Term, object: RDF.Term, reverse: boolean, isEmbedded: boolean }[] {
let buffer = this.unidentifiedValuesBuffer[depth];
if (!buffer) {
buffer = [];
Expand All @@ -359,7 +361,7 @@ export class ParsingContext {
* @return {{predicate: Term; object: Term; reverse: boolean}[]} An element of
* {@link ParsingContext.unidentifiedGraphsBuffer}.
*/
public getUnidentifiedGraphBufferSafe(depth: number): { subject: RDF.Term, predicate: RDF.Term, object: RDF.Term }[] {
public getUnidentifiedGraphBufferSafe(depth: number): { subject: RDF.Term, predicate: RDF.Term, object: RDF.Term, isEmbedded: boolean }[] {
let buffer = this.unidentifiedGraphsBuffer[depth];
if (!buffer) {
buffer = [];
Expand Down
79 changes: 78 additions & 1 deletion lib/Util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,16 @@ export class Util {
return parentKey === '@reverse' !== Util.isContextValueReverse(context, key);
}

/**
* Check if the given key exists inside an embedded node as direct child.
* @param {JsonLdContextNormalized} context A JSON-LD context.
* @param {string} parentKey The parent key.
* @return {boolean} If the property is embedded.
*/
public static isPropertyInEmbeddedNode(parentKey: string): boolean {
return parentKey === '@id';
}

/**
* Check if the given IRI is valid.
* @param {string} iri A potential IRI.
Expand Down Expand Up @@ -416,7 +426,18 @@ export class Util {
if (value["@type"] === '@vocab') {
return this.nullableTermToArray(this.createVocabOrBaseTerm(context, value["@id"]));
} else {
return this.nullableTermToArray(this.resourceToTerm(context, value["@id"]));
const valueId = value["@id"];
let valueTerm: RDF.Term | null;
if (typeof valueId === 'object') {
if (this.parsingContext.rdfstar) {
valueTerm = this.parsingContext.idStack[depth + 1][0];
} else {
throw new ErrorCoded(`Found illegal @id '${value}'`, ERROR_CODES.INVALID_ID_VALUE);
}
} else {
valueTerm = this.resourceToTerm(context, valueId);
}
return this.nullableTermToArray(valueTerm);
}
} else {
// Only make a blank node if at least one triple was emitted at the value's level.
Expand Down Expand Up @@ -883,4 +904,60 @@ export class Util {
return keyUnaliased === '@none' ? null : keyUnaliased;
}

/**
* Check if no reverse properties are present in embedded nodes.
* @param key The current key.
* @param reverse If a reverse property is active.
* @param isEmbedded If we're in an embedded node.
*/
public validateReverseInEmbeddedNode(key: string, reverse: boolean, isEmbedded: boolean): void {
if (isEmbedded && reverse && !this.parsingContext.rdfstarReverseInEmbedded) {
throw new ErrorCoded(`Illegal reverse property in embedded node in ${key}`,
ERROR_CODES.INVALID_EMBEDDED_NODE);
}
}

/**
* Emit a quad, with checks.
* @param depth The current depth.
* @param subject S
* @param predicate P
* @param object O
* @param graph G
* @param reverse If a reverse property is active.
* @param isEmbedded If we're in an embedded node.
*/
public emitQuadChecked(
depth: number,
subject: RDF.Term, predicate: RDF.Term, object: RDF.Term, graph: RDF.Term,
reverse: boolean, isEmbedded: boolean,
): void {
// Create a quad
let quad: RDF.BaseQuad;
if (reverse) {
this.validateReverseSubject(object);
quad = this.dataFactory.quad(object, predicate, subject, graph);
} else {
quad = this.dataFactory.quad(subject, predicate, object, graph);
}

// Emit the quad, unless it was created in an embedded node
if (isEmbedded) {
// Embedded nodes don't inherit the active graph
if (quad.graph.termType !== 'DefaultGraph') {
quad = this.dataFactory.quad(quad.subject, quad.predicate, quad.object);
}

// Multiple embedded nodes are not allowed
if (this.parsingContext.idStack[depth - 1]) {
throw new ErrorCoded(`Illegal multiple properties in an embedded node`,
ERROR_CODES.INVALID_EMBEDDED_NODE)
}

this.parsingContext.idStack[depth - 1] = [ quad ];
} else {
this.parsingContext.emitQuad(depth, quad);
}
}

}
2 changes: 1 addition & 1 deletion lib/containerhandler/ContainerHandlerIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ export class ContainerHandlerIndex implements IContainerHandler {
// Otherwise, attach the index to the node identifier
for (const indexValue of indexValues) {
await EntryHandlerPredicate.handlePredicateObject(parsingContext, util, keys, depth + 1,
indexProperty, indexValue, false);
indexProperty, indexValue, false, false);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/containerhandler/ContainerHandlerType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ export class ContainerHandlerType implements IContainerHandler {
if (type) {
// Push the type to the stack using the rdf:type predicate
await EntryHandlerPredicate.handlePredicateObject(parsingContext, util, keys, depth + 1,
util.rdfType, type, false);
util.rdfType, type, false, false);
}

// Flush any pending flush buffers
Expand Down
36 changes: 18 additions & 18 deletions lib/entryhandler/EntryHandlerPredicate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ export class EntryHandlerPredicate implements IEntryHandler<boolean> {
* @param {Term} predicate The predicate.
* @param {Term} object The object.
* @param {boolean} reverse If the property is reversed.
* @param {boolean} isEmbedded If the property exists in an embedded node as direct child.
* @return {Promise<void>} A promise resolving when handling is done.
*/
public static async handlePredicateObject(parsingContext: ParsingContext, util: Util, keys: any[], depth: number,
predicate: RDF.Term, object: RDF.Term, reverse: boolean) {
predicate: RDF.Term, object: RDF.Term,
reverse: boolean, isEmbedded: boolean) {
const depthProperties: number = await util.getPropertiesDepth(keys, depth);
const depthOffsetGraph = await util.getDepthOffsetGraph(depth, keys);
const depthPropertiesGraph: number = depth - depthOffsetGraph;
Expand All @@ -39,41 +41,31 @@ export class EntryHandlerPredicate implements IEntryHandler<boolean> {
if (graphs) {
for (const graph of graphs) {
// Emit our quad if graph @id is known
if (reverse) {
util.validateReverseSubject(object);
parsingContext.emitQuad(depth, util.dataFactory.quad(object, predicate, subject, graph));
} else {
parsingContext.emitQuad(depth, util.dataFactory.quad(subject, predicate, object, graph));
}
util.emitQuadChecked(depth, subject, predicate, object, graph, reverse, isEmbedded);
}
} else {
// Buffer our triple if graph @id is not known yet.
if (reverse) {
util.validateReverseSubject(object);
parsingContext.getUnidentifiedGraphBufferSafe(depthPropertiesGraph - 1).push(
{subject: object, predicate, object: subject});
{subject: object, predicate, object: subject, isEmbedded });
} else {
parsingContext.getUnidentifiedGraphBufferSafe(depthPropertiesGraph - 1)
.push({subject, predicate, object});
.push({subject, predicate, object, isEmbedded});
}
}
} else {
// Emit if no @graph was applicable
const graph = await util.getGraphContainerValue(keys, depthProperties);
if (reverse) {
util.validateReverseSubject(object);
parsingContext.emitQuad(depth, util.dataFactory.quad(object, predicate, subject, graph));
} else {
parsingContext.emitQuad(depth, util.dataFactory.quad(subject, predicate, object, graph));
}
util.emitQuadChecked(depth, subject, predicate, object, graph, reverse, isEmbedded);
}
}
} else {
// Buffer until our @id becomes known, or we go up the stack
if (reverse) {
util.validateReverseSubject(object);
}
parsingContext.getUnidentifiedValueBufferSafe(depthProperties).push({predicate, object, reverse});
parsingContext.getUnidentifiedValueBufferSafe(depthProperties).push({predicate, object, reverse, isEmbedded});
}
}

Expand Down Expand Up @@ -116,7 +108,15 @@ export class EntryHandlerPredicate implements IEntryHandler<boolean> {
const objects = await util.valueToTerm(context, key, value, depth, keys);
if (objects.length) {
for (let object of objects) {
const reverse = Util.isPropertyReverse(context, keyOriginal, await util.unaliasKeywordParent(keys, depth));
let parentKey = await util.unaliasKeywordParent(keys, depth);
const reverse = Util.isPropertyReverse(context, keyOriginal, parentKey);
if (parentKey === '@reverse') {
// Check parent of parent when checking if we're in an embedded node if in @reverse
depth--;
parentKey = await util.unaliasKeywordParent(keys, depth);
}
const isEmbedded = Util.isPropertyInEmbeddedNode(parentKey);
util.validateReverseInEmbeddedNode(key, reverse, isEmbedded);

if (value) {
// Special case if our term was defined as an @list, but does not occur in an array,
Expand All @@ -143,7 +143,7 @@ export class EntryHandlerPredicate implements IEntryHandler<boolean> {
}

await EntryHandlerPredicate.handlePredicateObject(parsingContext, util, keys, depth,
predicate, object, reverse);
predicate, object, reverse, isEmbedded);
}
}
}
Expand Down
12 changes: 11 additions & 1 deletion lib/entryhandler/keyword/EntryHandlerKeywordId.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,17 @@ export class EntryHandlerKeywordId extends EntryHandlerKeyword {
public async handle(parsingContext: ParsingContext, util: Util, key: any, keys: any[], value: any, depth: number)
: Promise<any> {
if (typeof value !== 'string') {
parsingContext.emitError(new ErrorCoded(`Found illegal @id '${value}'`, ERROR_CODES.INVALID_ID_VALUE));
// JSON-LD-star allows @id object values
if (parsingContext.rdfstar && typeof value === 'object') {
const valueKeys = Object.keys(value);
if (valueKeys.length === 1 && valueKeys[0] === '@id') {
parsingContext.emitError(new ErrorCoded(`Invalid embedded node without property with @id ${value['@id']}`,
ERROR_CODES.INVALID_EMBEDDED_NODE))
}
} else {
parsingContext.emitError(new ErrorCoded(`Found illegal @id '${value}'`, ERROR_CODES.INVALID_ID_VALUE));
}
return;
}

// Determine the canonical place for this id.
Expand Down
7 changes: 5 additions & 2 deletions lib/entryhandler/keyword/EntryHandlerKeywordType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ export class EntryHandlerKeywordType extends EntryHandlerKeyword {
// as it's possible that the @type is used to identify the datatype of a literal, which we ignore here.
const context = await parsingContext.getContext(keys);
const predicate = util.rdfType;
const reverse = Util.isPropertyReverse(context, keyOriginal, await util.unaliasKeywordParent(keys, depth));
const parentKey = await util.unaliasKeywordParent(keys, depth);
const reverse = Util.isPropertyReverse(context, keyOriginal, parentKey);
const isEmbedded = Util.isPropertyInEmbeddedNode(parentKey);
util.validateReverseInEmbeddedNode(key, reverse, isEmbedded);

// Handle multiple values if the value is an array
const elements = Array.isArray(value) ? value : [ value ];
Expand All @@ -37,7 +40,7 @@ export class EntryHandlerKeywordType extends EntryHandlerKeyword {
const type = util.createVocabOrBaseTerm(context, element);
if (type) {
await EntryHandlerPredicate.handlePredicateObject(parsingContext, util, keys, depth,
predicate, type, reverse);
predicate, type, reverse, isEmbedded);
}
}

Expand Down
Loading

0 comments on commit 0f54ecd

Please sign in to comment.