diff --git a/src/rdf.test.ts b/src/rdf.test.ts index 3c6bb24493..ab399e9a58 100644 --- a/src/rdf.test.ts +++ b/src/rdf.test.ts @@ -587,7 +587,7 @@ describe("fromRdfJsDataset", () => { ); }); - it("can parse chained Blank Nodes with a single lnk that end in a dangling Blank Node", () => { + it("can parse chained Blank Nodes with a single link that end in a dangling Blank Node", () => { const mockDataset: ImmutableDataset = { type: "Dataset", graphs: { default: {} }, diff --git a/src/rdfjs.internal.ts b/src/rdfjs.internal.ts index e9e08d5022..4606fc4477 100644 --- a/src/rdfjs.internal.ts +++ b/src/rdfjs.internal.ts @@ -502,9 +502,11 @@ function getCycleBlankNodes( ) .map((quad) => quad.object as RdfJs.BlankNode); - // If no Blank Nodes are connected to `currentNode`, we're done: + // If no Blank Nodes are connected to `currentNode`, and `currentNode` is not + // part of a cycle, we're done; the currently traversed Nodes do not form a + // cycle: if (blankNodeObjects.length === 0) { - return traversedBlankNodes; + return []; } // Store that we've traversed `currentNode`, then move on to all the Blank diff --git a/src/resource/solidDataset.test.ts b/src/resource/solidDataset.test.ts index 33d5220309..0727899ef0 100644 --- a/src/resource/solidDataset.test.ts +++ b/src/resource/solidDataset.test.ts @@ -167,6 +167,155 @@ describe("responseToSolidDataset", () => { }); }); + it("does not include non-deterministic identifiers when it detects non-cyclic chains of Blank Nodes", async () => { + const turtle = ` + @prefix : <#>. + @prefix foaf: . + @prefix vcard: . + @prefix acl: . + + <> a foaf:PersonalProfileDocument; foaf:maker :me; foaf:primaryTopic :me. + + :me + a foaf:Person; + vcard:fn "Vincent"; + acl:trustedApp + [ + acl:mode acl:Append, acl:Control, acl:Read, acl:Write; + acl:origin + ], + [ + acl:mode acl:Append, acl:Control, acl:Read, acl:Write; + acl:origin + ]. + `; + + const response = new Response(turtle, { + headers: { + "Content-Type": "text/turtle", + }, + }); + jest + .spyOn(response, "url", "get") + .mockReturnValue("https://some.pod/resource"); + const solidDataset = await responseToSolidDataset(response); + + expect(solidDataset).toStrictEqual({ + graphs: { + default: { + "https://some.pod/resource": { + predicates: { + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": { + namedNodes: [ + "http://xmlns.com/foaf/0.1/PersonalProfileDocument", + ], + }, + "http://xmlns.com/foaf/0.1/maker": { + namedNodes: ["https://some.pod/resource#me"], + }, + "http://xmlns.com/foaf/0.1/primaryTopic": { + namedNodes: ["https://some.pod/resource#me"], + }, + }, + type: "Subject", + url: "https://some.pod/resource", + }, + "https://some.pod/resource#me": { + predicates: { + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": { + namedNodes: ["http://xmlns.com/foaf/0.1/Person"], + }, + "http://www.w3.org/2006/vcard/ns#fn": { + literals: { + "http://www.w3.org/2001/XMLSchema#string": ["Vincent"], + }, + }, + "http://www.w3.org/ns/auth/acl#trustedApp": { + blankNodes: [ + { + "http://www.w3.org/ns/auth/acl#mode": { + namedNodes: [ + "http://www.w3.org/ns/auth/acl#Append", + "http://www.w3.org/ns/auth/acl#Control", + "http://www.w3.org/ns/auth/acl#Read", + "http://www.w3.org/ns/auth/acl#Write", + ], + }, + "http://www.w3.org/ns/auth/acl#origin": { + namedNodes: ["http://localhost:3000"], + }, + }, + { + "http://www.w3.org/ns/auth/acl#mode": { + namedNodes: [ + "http://www.w3.org/ns/auth/acl#Append", + "http://www.w3.org/ns/auth/acl#Control", + "http://www.w3.org/ns/auth/acl#Read", + "http://www.w3.org/ns/auth/acl#Write", + ], + }, + "http://www.w3.org/ns/auth/acl#origin": { + namedNodes: ["https://penny.vincenttunru.com"], + }, + }, + ], + }, + }, + type: "Subject", + url: "https://some.pod/resource#me", + }, + }, + }, + internal_resourceInfo: { + contentType: "text/turtle", + isRawData: false, + linkedResources: {}, + sourceIri: "https://some.pod/resource", + }, + type: "Dataset", + }); + }); + + it("does not attempt to detect chains when there are many Blank Nodes, to avoid performance bottlenecks", async () => { + function getChainedBlankNode(iteration: number): string { + if (iteration === 1000) { + return ` "Base case"`; + } + return ` [${getChainedBlankNode( + iteration + 1 + )}]`; + } + const turtle = ` + @prefix : <#>. + @prefix vcard: . + + :me vcard:fn [${getChainedBlankNode(0)}]. + `; + + const response = new Response(turtle, { + headers: { + "Content-Type": "text/turtle", + }, + }); + jest + .spyOn(response, "url", "get") + .mockReturnValue("https://some.pod/resource"); + + const t0 = performance.now(); + const solidDataset = await responseToSolidDataset(response); + const t1 = performance.now(); + + // Parsing a document with over 1000 statements will always be somewhat slow + // (hence allowing it to take a second), but if it attempts to detect + // chains, it will take on the order of >20 seconds. + expect(t1 - t0).toBeLessThan(1000); + // Blank Nodes should be listed explicitly, rather than as properties on + // https://some.pod/resource#me: + expect(Object.keys(solidDataset.graphs.default)).not.toStrictEqual([ + "https://some.pod/resource#me", + ]); + }); + it("throws a meaningful error when the server returned a 403", async () => { const response = new Response("Not allowed", { status: 403 }); jest diff --git a/src/resource/solidDataset.ts b/src/resource/solidDataset.ts index 70caa52f76..7d3c9a37cf 100644 --- a/src/resource/solidDataset.ts +++ b/src/resource/solidDataset.ts @@ -233,11 +233,19 @@ export async function responseToSolidDataset( solidDataset = addRdfJsQuadToDataset(solidDataset, quad); } }); - parser.onComplete(() => { + parser.onComplete(async () => { + // If a Resource contains more than this number of Blank Nodes, + // we consider the detection of chains (O(n^2), I think) to be too + // expensive, and just incorporate them as regular Blank Nodes with + // non-deterministic, ad-hoc identifiers into the SolidDataset: + const maxBlankNodesToDetectChainsFor = 20; // Some Blank Nodes only serve to use a set of Quads as the Object for a // single Subject. Those Quads will be added to the SolidDataset when // their Subject's Blank Node is encountered in the Object position. - const chainBlankNodes = getChainBlankNodes(quadsWithBlankNodes); + const chainBlankNodes = + quadsWithBlankNodes.length <= maxBlankNodesToDetectChainsFor + ? getChainBlankNodes(quadsWithBlankNodes) + : []; const quadsWithoutChainBlankNodeSubjects = quadsWithBlankNodes.filter( (quad) => chainBlankNodes.every(