From 10656356155ac107744120d69f2101ea6caba22a Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Wed, 18 Sep 2024 13:34:52 +0200 Subject: [PATCH 1/6] feat: very basic clustering setup --- src/dataflow/cluster.ts | 32 +++++++++++++++++++ .../dataflow/graph/cluster-tests.ts | 30 +++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 src/dataflow/cluster.ts create mode 100644 test/functionality/dataflow/graph/cluster-tests.ts diff --git a/src/dataflow/cluster.ts b/src/dataflow/cluster.ts new file mode 100644 index 0000000000..04e9dbeb3b --- /dev/null +++ b/src/dataflow/cluster.ts @@ -0,0 +1,32 @@ +import type { DataflowGraph } from './graph/graph'; +import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id'; + +export type DataflowGraphClusters = DataflowGraphCluster[]; +export interface DataflowGraphCluster { + readonly startNode: NodeId; + readonly members: readonly NodeId[]; +} + +export function findAllClusters(graph: DataflowGraph): DataflowGraphClusters { + const clusters: DataflowGraphClusters = []; + const notReached = new Set([...graph.vertices(true)].map(([id]) => id)); + while(notReached.size > 0){ + const [startNode] = notReached; + notReached.delete(startNode); + clusters.push({ startNode: startNode, members: [startNode, ...cluster(graph, startNode, notReached)] }); + } + return clusters; +} + +function cluster(graph: DataflowGraph, from: NodeId, notReached: Set): NodeId[] { + const edges: NodeId[] = []; + // TODO do we only need outgoing edges?? help + for(const [to] of graph.outgoingEdges(from) ?? []) { + // TODO just deleting these is insufficient, examples like: edge(0, 1) + edge(1, 0) + if(notReached.delete(to)) { + edges.push(to); + edges.push(...cluster(graph, to, notReached)); + } + } + return edges; +} diff --git a/test/functionality/dataflow/graph/cluster-tests.ts b/test/functionality/dataflow/graph/cluster-tests.ts new file mode 100644 index 0000000000..c905cb6e7c --- /dev/null +++ b/test/functionality/dataflow/graph/cluster-tests.ts @@ -0,0 +1,30 @@ +import type { DataflowGraph } from '../../../../src/dataflow/graph/graph'; +import type { DataflowGraphClusters } from '../../../../src/dataflow/cluster'; +import { findAllClusters } from '../../../../src/dataflow/cluster'; +import { assert } from 'chai'; +import { emptyGraph } from '../../_helper/dataflow/dataflowgraph-builder'; + +describe('Graph Clustering', () => { + describe('Simple', () => { + describe('Positive', () => { + test('empty', emptyGraph(), []); + test('single vertex', emptyGraph().use(0, 'x'), [{ startNode: 0, members: [0] }]); + test('single edge', emptyGraph().use(0, 'x').use(1, 'y').reads(0, 1), [{ startNode: 0, members: [0, 1] }]); + }); + }); +}); + +function test(name: string, graph: DataflowGraph, expected: DataflowGraphClusters): void { + it(name, () => { + const actual = findAllClusters(graph); + assert.equal(actual.length, expected.length, 'Different number of clusters'); + for(let i = 0; i < actual.length; i++) { + // TODO probably allow arbitrary cluster & cluster member ordering between actual and expected + assert.equal(actual[i].startNode, expected[i].startNode, `Start node of cluster ${i} differs`); + assert.equal(actual[i].members.length, expected[i].members.length, `Member amounts of cluster ${i} differ`); + for(let m = 0; m < actual[i].members.length; m++) { + assert.equal(actual[i].members[m], expected[i].members[m], `Member ${m} of cluster ${i} differs`); + } + } + }); +} From ae21f0228571fe402f4a89ec74fc024246949165 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Wed, 18 Sep 2024 13:39:04 +0200 Subject: [PATCH 2/6] test: slight test cleanup --- test/functionality/dataflow/graph/cluster-tests.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/functionality/dataflow/graph/cluster-tests.ts b/test/functionality/dataflow/graph/cluster-tests.ts index c905cb6e7c..fee0ab25a9 100644 --- a/test/functionality/dataflow/graph/cluster-tests.ts +++ b/test/functionality/dataflow/graph/cluster-tests.ts @@ -6,11 +6,12 @@ import { emptyGraph } from '../../_helper/dataflow/dataflowgraph-builder'; describe('Graph Clustering', () => { describe('Simple', () => { - describe('Positive', () => { - test('empty', emptyGraph(), []); - test('single vertex', emptyGraph().use(0, 'x'), [{ startNode: 0, members: [0] }]); - test('single edge', emptyGraph().use(0, 'x').use(1, 'y').reads(0, 1), [{ startNode: 0, members: [0, 1] }]); - }); + test('empty', emptyGraph(), []); + test('single vertex', emptyGraph().use(0, 'x'), [{ startNode: 0, members: [0] }]); + test('single edge', emptyGraph().use(0, 'x').use(1, 'y').reads(0, 1), [{ startNode: 0, members: [0, 1] }]); + test('two single-edge', + emptyGraph().use(0, 'x').use(1, 'y').reads(0, 1).use(2, 'z').use(3, 'w').reads(2, 3), + [{ startNode: 0, members: [0, 1] }, { startNode: 2, members: [2, 3] }]); }); }); From f734a782b2eaa3cf591b0e0073851edc9d0b1758 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 19 Sep 2024 12:43:05 +0200 Subject: [PATCH 3/6] test: allow arbitrary cluster member ordering --- test/functionality/dataflow/graph/cluster-tests.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/test/functionality/dataflow/graph/cluster-tests.ts b/test/functionality/dataflow/graph/cluster-tests.ts index fee0ab25a9..b2caf43f25 100644 --- a/test/functionality/dataflow/graph/cluster-tests.ts +++ b/test/functionality/dataflow/graph/cluster-tests.ts @@ -20,11 +20,9 @@ function test(name: string, graph: DataflowGraph, expected: DataflowGraphCluster const actual = findAllClusters(graph); assert.equal(actual.length, expected.length, 'Different number of clusters'); for(let i = 0; i < actual.length; i++) { - // TODO probably allow arbitrary cluster & cluster member ordering between actual and expected - assert.equal(actual[i].startNode, expected[i].startNode, `Start node of cluster ${i} differs`); - assert.equal(actual[i].members.length, expected[i].members.length, `Member amounts of cluster ${i} differ`); + assert.equal(actual[i].members.length, expected[i].members.length, `Member amounts of cluster differ: ${actual[i].members.toString()} vs ${expected[i].members.toString()}`); for(let m = 0; m < actual[i].members.length; m++) { - assert.equal(actual[i].members[m], expected[i].members[m], `Member ${m} of cluster ${i} differs`); + assert.isTrue(expected[i].members.includes(actual[i].members[m]), `Member ${actual[i].members[m]} of cluster differs`); } } }); From 097d83939ec8fe2d2dd5da4abc4174fa41419e18 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Thu, 19 Sep 2024 13:54:55 +0200 Subject: [PATCH 4/6] test: some failing tests :) --- .github/workflows/qa.yaml | 22 ++--- src/dataflow/cluster.ts | 1 + .../dataflow/graph/cluster-tests.ts | 86 ++++++++++++++++--- 3 files changed, 85 insertions(+), 24 deletions(-) diff --git a/.github/workflows/qa.yaml b/.github/workflows/qa.yaml index 9552cc6eea..c5fb187bb8 100644 --- a/.github/workflows/qa.yaml +++ b/.github/workflows/qa.yaml @@ -1,6 +1,6 @@ name: "QA" # Runs on each push and tests flowR for the default configuration -# Depending on the targets etc. this may perform many more checks! +# Depending on the targets, etc. this may perform many more checks! 'on': push: @@ -102,16 +102,16 @@ jobs: with: node-version: ${{ env.ACTION_NODE_VERSION }} registry-url: "https://registry.npmjs.org/" - + - name: "⬇️ Setup R" uses: r-lib/actions/setup-r@v2 with: r-version: ${{ env.ACTION_R_VERSION }} - + - name: "📦 Install R Packages" shell: Rscript {0} run: install.packages("xmlparsedata", repos="https://cloud.r-project.org/") - + - name: "🧪 Run the Tests" run: bash .github/workflows/scripts/run-flowr-command.sh "test-full -- --forbid-only" @@ -151,16 +151,16 @@ jobs: with: node-version: ${{ env.ACTION_NODE_VERSION }} registry-url: "https://registry.npmjs.org/" - + - name: "⬇️ Setup R" uses: r-lib/actions/setup-r@v2 with: r-version: ${{ env.ACTION_R_VERSION }} - + - name: "📦 Install R Packages" shell: Rscript {0} run: install.packages("xmlparsedata", repos="https://cloud.r-project.org/") - + - name: "⏱️ Run the performance benchmarks" run: bash .github/workflows/scripts/run-flowr-command.sh performance-test -- 1 1 "${{ matrix.name }}" @@ -237,7 +237,7 @@ jobs: gh-repository: ${{ github.repository }} benchmark-data-dir-path: wiki/stats/benchmark/ auto-push: false - + deploy-doc: needs: [ test, performance-test ] name: "🚀 Build and Deploy Documentation (only on main)" @@ -264,16 +264,16 @@ jobs: with: node-version: ${{ env.ACTION_NODE_VERSION }} registry-url: "https://registry.npmjs.org/" - + - name: "⬇️ Setup R" uses: r-lib/actions/setup-r@v2 with: r-version: ${{ env.ACTION_R_VERSION }} - + - name: "📦 Install R Packages" shell: Rscript {0} run: install.packages("xmlparsedata", repos="https://cloud.r-project.org/") - + - name: "🛠️ Build the documentation" run: bash .github/workflows/scripts/run-flowr-command.sh doc diff --git a/src/dataflow/cluster.ts b/src/dataflow/cluster.ts index 04e9dbeb3b..a594146275 100644 --- a/src/dataflow/cluster.ts +++ b/src/dataflow/cluster.ts @@ -10,6 +10,7 @@ export interface DataflowGraphCluster { export function findAllClusters(graph: DataflowGraph): DataflowGraphClusters { const clusters: DataflowGraphClusters = []; const notReached = new Set([...graph.vertices(true)].map(([id]) => id)); + /* TODO: probably it is best to start from back to front ? */ while(notReached.size > 0){ const [startNode] = notReached; notReached.delete(startNode); diff --git a/test/functionality/dataflow/graph/cluster-tests.ts b/test/functionality/dataflow/graph/cluster-tests.ts index b2caf43f25..8a70abda30 100644 --- a/test/functionality/dataflow/graph/cluster-tests.ts +++ b/test/functionality/dataflow/graph/cluster-tests.ts @@ -3,9 +3,30 @@ import type { DataflowGraphClusters } from '../../../../src/dataflow/cluster'; import { findAllClusters } from '../../../../src/dataflow/cluster'; import { assert } from 'chai'; import { emptyGraph } from '../../_helper/dataflow/dataflowgraph-builder'; +import type { SingleSlicingCriterion } from '../../../../src/slicing/criterion/parse'; +import { PipelineExecutor } from '../../../../src/core/pipeline-executor'; +import { DEFAULT_DATAFLOW_PIPELINE } from '../../../../src/core/steps/pipeline/default-pipelines'; +import { requestFromInput } from '../../../../src/r-bridge/retriever'; +import { deterministicCountingIdGenerator } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; +import { withShell } from '../../_helper/shell'; +import { slicingCriterionToId } from '../../../../src/slicing/criterion/parse'; +import type { NodeId } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/node-id'; describe('Graph Clustering', () => { - describe('Simple', () => { + describe('Simple Graph Tests', () => { + function test(name: string, graph: DataflowGraph, expected: DataflowGraphClusters): void { + it(name, () => { + const actual = findAllClusters(graph); + assert.equal(actual.length, expected.length, 'Different number of clusters'); + for(let i = 0; i < actual.length; i++) { + assert.equal(actual[i].members.length, expected[i].members.length, `Member amounts of cluster differ: ${actual[i].members.toString()} vs ${expected[i].members.toString()}`); + for(let m = 0; m < actual[i].members.length; m++) { + assert.isTrue(expected[i].members.includes(actual[i].members[m]), `Member ${actual[i].members[m]} of cluster differs`); + } + } + }); + } + test('empty', emptyGraph(), []); test('single vertex', emptyGraph().use(0, 'x'), [{ startNode: 0, members: [0] }]); test('single edge', emptyGraph().use(0, 'x').use(1, 'y').reads(0, 1), [{ startNode: 0, members: [0, 1] }]); @@ -13,17 +34,56 @@ describe('Graph Clustering', () => { emptyGraph().use(0, 'x').use(1, 'y').reads(0, 1).use(2, 'z').use(3, 'w').reads(2, 3), [{ startNode: 0, members: [0, 1] }, { startNode: 2, members: [2, 3] }]); }); -}); -function test(name: string, graph: DataflowGraph, expected: DataflowGraphClusters): void { - it(name, () => { - const actual = findAllClusters(graph); - assert.equal(actual.length, expected.length, 'Different number of clusters'); - for(let i = 0; i < actual.length; i++) { - assert.equal(actual[i].members.length, expected[i].members.length, `Member amounts of cluster differ: ${actual[i].members.toString()} vs ${expected[i].members.toString()}`); - for(let m = 0; m < actual[i].members.length; m++) { - assert.isTrue(expected[i].members.includes(actual[i].members[m]), `Member ${actual[i].members[m]} of cluster differs`); - } + function compareIds(a: NodeId | undefined, b: NodeId | undefined): number { + return String(a ?? '').localeCompare(String(b ?? '')); + } + + function normalizeClusters(clusters: DataflowGraphClusters) { + /* sort order and the order members */ + return clusters.map(c => ({ + startNode: c.startNode, + members: [...c.members].sort(compareIds) + })).sort((a, b) => compareIds(a.members[0], b.members[0])); + } + + describe('Code Snippets', withShell(shell => { + function test(name: string, code: string, clusters: readonly SingleSlicingCriterion[][]) { + it(name, async() => { + const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput(code), + getId: deterministicCountingIdGenerator(0) + }).allRemainingSteps(); + + const graph = info.dataflow.graph; + + // resolve all criteria + const resolved = normalizeClusters(clusters.map(c => ({ + startNode: '', + members: c.map(s => slicingCriterionToId(s, graph.idMap ?? info.normalize.idMap)) + }))); + + const actual = normalizeClusters(findAllClusters(graph)); + assert.equal(actual.length, resolved.length, `Different number of clusters: ${JSON.stringify(actual)} vs. wanted: ${JSON.stringify(resolved)}`); + for(let i = 0; i < actual.length; i++) { + assert.equal(actual[i].members.length, resolved[i].members.length, `Member amounts of cluster differ: ${JSON.stringify(actual[i].members)} vs ${JSON.stringify(resolved[i])}`); + for(let m = 0; m < actual[i].members.length; m++) { + assert.isTrue(resolved[i].members.includes(actual[i].members[m]), `Member ${actual[i].members[m]} of cluster differs`); + } + } + }); } - }); -} + + test('assignment', 'x <- 3', [['1:1', '1:3', '1:6']]); + test('two independent assignments', 'x <- 3\ny <- 4', [['1:1', '1:3', '1:6'], ['2:1', '2:3', '2:6']]); + test('with a print call', 'x <- 3\nprint(x)', [['1:1', '1:3', '1:6', '2:1', '2:7']]); + test('late join of clusters', 'x <- 3\ny <- 4\nprint(x + y)', [['1:1', '1:3', '1:6', '2:1', '2:3', '2:6', '3:1', '3:7', '3:9', '3:11']]); + test('contain call target', 'y <- 42\nf <- function(x) { x * y }\nf(2)\nf(3)', [['1:1', '1:3', '1:6', '2:1', '2:3', '2:6', '2:15', '2:18', '2:20', '2:22', '2:24', '3:1', '3:3', '4:1', '4:3']]); + test('some odd ducklings', 'y <- 42\nz <- 5\nf <- function(x) { x * y }\nf(2)\nprint(z)\nf(3)\nu', [ + ['1:1', '1:3', '1:6', '3:1', '3:3', '3:6', '3:15', '3:18', '3:20', '3:22', '3:24', '3:1', '3:3', '6:1', '6:3'], /* call as before */ + ['2:1', '2:3', '2:6', '5:1', '5:7'], /* print & z */ + ['7:1'] /* u */ + ]); + })); +}); From 61015dcdc915977a36e4bbc7ff61d95e3762aca1 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 19 Sep 2024 14:49:20 +0200 Subject: [PATCH 5/6] test: clean up cluster comparisons --- .../dataflow/graph/cluster-tests.ts | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/test/functionality/dataflow/graph/cluster-tests.ts b/test/functionality/dataflow/graph/cluster-tests.ts index 8a70abda30..67f4452aa3 100644 --- a/test/functionality/dataflow/graph/cluster-tests.ts +++ b/test/functionality/dataflow/graph/cluster-tests.ts @@ -15,16 +15,7 @@ import type { NodeId } from '../../../../src/r-bridge/lang-4.x/ast/model/process describe('Graph Clustering', () => { describe('Simple Graph Tests', () => { function test(name: string, graph: DataflowGraph, expected: DataflowGraphClusters): void { - it(name, () => { - const actual = findAllClusters(graph); - assert.equal(actual.length, expected.length, 'Different number of clusters'); - for(let i = 0; i < actual.length; i++) { - assert.equal(actual[i].members.length, expected[i].members.length, `Member amounts of cluster differ: ${actual[i].members.toString()} vs ${expected[i].members.toString()}`); - for(let m = 0; m < actual[i].members.length; m++) { - assert.isTrue(expected[i].members.includes(actual[i].members[m]), `Member ${actual[i].members[m]} of cluster differs`); - } - } - }); + it(name, () => compareClusters(findAllClusters(graph), expected)); } test('empty', emptyGraph(), []); @@ -63,15 +54,8 @@ describe('Graph Clustering', () => { startNode: '', members: c.map(s => slicingCriterionToId(s, graph.idMap ?? info.normalize.idMap)) }))); - const actual = normalizeClusters(findAllClusters(graph)); - assert.equal(actual.length, resolved.length, `Different number of clusters: ${JSON.stringify(actual)} vs. wanted: ${JSON.stringify(resolved)}`); - for(let i = 0; i < actual.length; i++) { - assert.equal(actual[i].members.length, resolved[i].members.length, `Member amounts of cluster differ: ${JSON.stringify(actual[i].members)} vs ${JSON.stringify(resolved[i])}`); - for(let m = 0; m < actual[i].members.length; m++) { - assert.isTrue(resolved[i].members.includes(actual[i].members[m]), `Member ${actual[i].members[m]} of cluster differs`); - } - } + compareClusters(actual, resolved); }); } @@ -87,3 +71,13 @@ describe('Graph Clustering', () => { ]); })); }); + +function compareClusters(actual: DataflowGraphClusters, expected: DataflowGraphClusters): void { + assert.equal(actual.length, expected.length, `Different number of clusters: ${JSON.stringify(actual)} vs. wanted: ${JSON.stringify(expected)}`); + for(let i = 0; i < actual.length; i++) { + assert.equal(actual[i].members.length, expected[i].members.length, `Member amounts of cluster differ: ${actual[i].members.toString()} vs ${expected[i].members.toString()}`); + for(let m = 0; m < actual[i].members.length; m++) { + assert.equal(actual[i].members[m], expected[i].members[m], `Member ${actual[i].members[m]} of cluster ${i} differs`); + } + } +} From 1267019fcc3011c6f0801b68abb168181bfad558 Mon Sep 17 00:00:00 2001 From: Ellpeck Date: Thu, 19 Sep 2024 14:51:55 +0200 Subject: [PATCH 6/6] test: normalize clusters when comparing --- .../dataflow/graph/cluster-tests.ts | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/test/functionality/dataflow/graph/cluster-tests.ts b/test/functionality/dataflow/graph/cluster-tests.ts index 67f4452aa3..0f25aa1751 100644 --- a/test/functionality/dataflow/graph/cluster-tests.ts +++ b/test/functionality/dataflow/graph/cluster-tests.ts @@ -26,20 +26,8 @@ describe('Graph Clustering', () => { [{ startNode: 0, members: [0, 1] }, { startNode: 2, members: [2, 3] }]); }); - function compareIds(a: NodeId | undefined, b: NodeId | undefined): number { - return String(a ?? '').localeCompare(String(b ?? '')); - } - - function normalizeClusters(clusters: DataflowGraphClusters) { - /* sort order and the order members */ - return clusters.map(c => ({ - startNode: c.startNode, - members: [...c.members].sort(compareIds) - })).sort((a, b) => compareIds(a.members[0], b.members[0])); - } - describe('Code Snippets', withShell(shell => { - function test(name: string, code: string, clusters: readonly SingleSlicingCriterion[][]) { + function test(name: string, code: string, clusters: readonly SingleSlicingCriterion[][]): void { it(name, async() => { const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, @@ -50,11 +38,11 @@ describe('Graph Clustering', () => { const graph = info.dataflow.graph; // resolve all criteria - const resolved = normalizeClusters(clusters.map(c => ({ + const resolved = clusters.map(c => ({ startNode: '', members: c.map(s => slicingCriterionToId(s, graph.idMap ?? info.normalize.idMap)) - }))); - const actual = normalizeClusters(findAllClusters(graph)); + })); + const actual = findAllClusters(graph); compareClusters(actual, resolved); }); } @@ -73,6 +61,9 @@ describe('Graph Clustering', () => { }); function compareClusters(actual: DataflowGraphClusters, expected: DataflowGraphClusters): void { + actual = normalizeClusters(actual); + expected = normalizeClusters(expected); + assert.equal(actual.length, expected.length, `Different number of clusters: ${JSON.stringify(actual)} vs. wanted: ${JSON.stringify(expected)}`); for(let i = 0; i < actual.length; i++) { assert.equal(actual[i].members.length, expected[i].members.length, `Member amounts of cluster differ: ${actual[i].members.toString()} vs ${expected[i].members.toString()}`); @@ -80,4 +71,16 @@ function compareClusters(actual: DataflowGraphClusters, expected: DataflowGraphC assert.equal(actual[i].members[m], expected[i].members[m], `Member ${actual[i].members[m]} of cluster ${i} differs`); } } + + function compareIds(a: NodeId | undefined, b: NodeId | undefined): number { + return String(a ?? '').localeCompare(String(b ?? '')); + } + + function normalizeClusters(clusters: DataflowGraphClusters): DataflowGraphClusters { + /* sort order and the order members */ + return clusters.map(c => ({ + startNode: c.startNode, + members: [...c.members].sort(compareIds) + })).sort((a, b) => compareIds(a.members[0], b.members[0])); + } }