diff --git a/src/rdf.test.ts b/src/rdf.test.ts
index 3c6bb24493..ab399e9a58 100644
--- a/src/rdf.test.ts
+++ b/src/rdf.test.ts
@@ -587,7 +587,7 @@ describe("fromRdfJsDataset", () => {
);
});
- it("can parse chained Blank Nodes with a single lnk that end in a dangling Blank Node", () => {
+ it("can parse chained Blank Nodes with a single link that end in a dangling Blank Node", () => {
const mockDataset: ImmutableDataset = {
type: "Dataset",
graphs: { default: {} },
diff --git a/src/rdfjs.internal.ts b/src/rdfjs.internal.ts
index e9e08d5022..4606fc4477 100644
--- a/src/rdfjs.internal.ts
+++ b/src/rdfjs.internal.ts
@@ -502,9 +502,11 @@ function getCycleBlankNodes(
)
.map((quad) => quad.object as RdfJs.BlankNode);
- // If no Blank Nodes are connected to `currentNode`, we're done:
+ // If no Blank Nodes are connected to `currentNode`, and `currentNode` is not
+ // part of a cycle, we're done; the currently traversed Nodes do not form a
+ // cycle:
if (blankNodeObjects.length === 0) {
- return traversedBlankNodes;
+ return [];
}
// Store that we've traversed `currentNode`, then move on to all the Blank
diff --git a/src/resource/solidDataset.test.ts b/src/resource/solidDataset.test.ts
index 33d5220309..0727899ef0 100644
--- a/src/resource/solidDataset.test.ts
+++ b/src/resource/solidDataset.test.ts
@@ -167,6 +167,155 @@ describe("responseToSolidDataset", () => {
});
});
+ it("does not include non-deterministic identifiers when it detects non-cyclic chains of Blank Nodes", async () => {
+ const turtle = `
+ @prefix : <#>.
+ @prefix foaf: .
+ @prefix vcard: .
+ @prefix acl: .
+
+ <> a foaf:PersonalProfileDocument; foaf:maker :me; foaf:primaryTopic :me.
+
+ :me
+ a foaf:Person;
+ vcard:fn "Vincent";
+ acl:trustedApp
+ [
+ acl:mode acl:Append, acl:Control, acl:Read, acl:Write;
+ acl:origin
+ ],
+ [
+ acl:mode acl:Append, acl:Control, acl:Read, acl:Write;
+ acl:origin
+ ].
+ `;
+
+ const response = new Response(turtle, {
+ headers: {
+ "Content-Type": "text/turtle",
+ },
+ });
+ jest
+ .spyOn(response, "url", "get")
+ .mockReturnValue("https://some.pod/resource");
+ const solidDataset = await responseToSolidDataset(response);
+
+ expect(solidDataset).toStrictEqual({
+ graphs: {
+ default: {
+ "https://some.pod/resource": {
+ predicates: {
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": {
+ namedNodes: [
+ "http://xmlns.com/foaf/0.1/PersonalProfileDocument",
+ ],
+ },
+ "http://xmlns.com/foaf/0.1/maker": {
+ namedNodes: ["https://some.pod/resource#me"],
+ },
+ "http://xmlns.com/foaf/0.1/primaryTopic": {
+ namedNodes: ["https://some.pod/resource#me"],
+ },
+ },
+ type: "Subject",
+ url: "https://some.pod/resource",
+ },
+ "https://some.pod/resource#me": {
+ predicates: {
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": {
+ namedNodes: ["http://xmlns.com/foaf/0.1/Person"],
+ },
+ "http://www.w3.org/2006/vcard/ns#fn": {
+ literals: {
+ "http://www.w3.org/2001/XMLSchema#string": ["Vincent"],
+ },
+ },
+ "http://www.w3.org/ns/auth/acl#trustedApp": {
+ blankNodes: [
+ {
+ "http://www.w3.org/ns/auth/acl#mode": {
+ namedNodes: [
+ "http://www.w3.org/ns/auth/acl#Append",
+ "http://www.w3.org/ns/auth/acl#Control",
+ "http://www.w3.org/ns/auth/acl#Read",
+ "http://www.w3.org/ns/auth/acl#Write",
+ ],
+ },
+ "http://www.w3.org/ns/auth/acl#origin": {
+ namedNodes: ["http://localhost:3000"],
+ },
+ },
+ {
+ "http://www.w3.org/ns/auth/acl#mode": {
+ namedNodes: [
+ "http://www.w3.org/ns/auth/acl#Append",
+ "http://www.w3.org/ns/auth/acl#Control",
+ "http://www.w3.org/ns/auth/acl#Read",
+ "http://www.w3.org/ns/auth/acl#Write",
+ ],
+ },
+ "http://www.w3.org/ns/auth/acl#origin": {
+ namedNodes: ["https://penny.vincenttunru.com"],
+ },
+ },
+ ],
+ },
+ },
+ type: "Subject",
+ url: "https://some.pod/resource#me",
+ },
+ },
+ },
+ internal_resourceInfo: {
+ contentType: "text/turtle",
+ isRawData: false,
+ linkedResources: {},
+ sourceIri: "https://some.pod/resource",
+ },
+ type: "Dataset",
+ });
+ });
+
+ it("does not attempt to detect chains when there are many Blank Nodes, to avoid performance bottlenecks", async () => {
+ function getChainedBlankNode(iteration: number): string {
+ if (iteration === 1000) {
+ return ` "Base case"`;
+ }
+ return ` [${getChainedBlankNode(
+ iteration + 1
+ )}]`;
+ }
+ const turtle = `
+ @prefix : <#>.
+ @prefix vcard: .
+
+ :me vcard:fn [${getChainedBlankNode(0)}].
+ `;
+
+ const response = new Response(turtle, {
+ headers: {
+ "Content-Type": "text/turtle",
+ },
+ });
+ jest
+ .spyOn(response, "url", "get")
+ .mockReturnValue("https://some.pod/resource");
+
+ const t0 = performance.now();
+ const solidDataset = await responseToSolidDataset(response);
+ const t1 = performance.now();
+
+ // Parsing a document with over 1000 statements will always be somewhat slow
+ // (hence allowing it to take a second), but if it attempts to detect
+ // chains, it will take on the order of >20 seconds.
+ expect(t1 - t0).toBeLessThan(1000);
+ // Blank Nodes should be listed explicitly, rather than as properties on
+ // https://some.pod/resource#me:
+ expect(Object.keys(solidDataset.graphs.default)).not.toStrictEqual([
+ "https://some.pod/resource#me",
+ ]);
+ });
+
it("throws a meaningful error when the server returned a 403", async () => {
const response = new Response("Not allowed", { status: 403 });
jest
diff --git a/src/resource/solidDataset.ts b/src/resource/solidDataset.ts
index 70caa52f76..7d3c9a37cf 100644
--- a/src/resource/solidDataset.ts
+++ b/src/resource/solidDataset.ts
@@ -233,11 +233,19 @@ export async function responseToSolidDataset(
solidDataset = addRdfJsQuadToDataset(solidDataset, quad);
}
});
- parser.onComplete(() => {
+ parser.onComplete(async () => {
+ // If a Resource contains more than this number of Blank Nodes,
+ // we consider the detection of chains (O(n^2), I think) to be too
+ // expensive, and just incorporate them as regular Blank Nodes with
+ // non-deterministic, ad-hoc identifiers into the SolidDataset:
+ const maxBlankNodesToDetectChainsFor = 20;
// Some Blank Nodes only serve to use a set of Quads as the Object for a
// single Subject. Those Quads will be added to the SolidDataset when
// their Subject's Blank Node is encountered in the Object position.
- const chainBlankNodes = getChainBlankNodes(quadsWithBlankNodes);
+ const chainBlankNodes =
+ quadsWithBlankNodes.length <= maxBlankNodesToDetectChainsFor
+ ? getChainBlankNodes(quadsWithBlankNodes)
+ : [];
const quadsWithoutChainBlankNodeSubjects = quadsWithBlankNodes.filter(
(quad) =>
chainBlankNodes.every(