-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add additional ":lineage" command and message (#915)
* feat: first attempt at the new getLineage command * feat: walk additional edges for determining the lineage * feat: store lineage IDs in a set * feat: prevent infinite recursion when determining the lineage IDs * feat-fix: fixed lineage network request * feat-fix: get the dfg and ast from the pipeline in the correct way * doc: added lineage request doc * feat-fix: add filetoken to error messages Co-authored-by: Florian Sihler <florian.sihler@uni-ulm.de> * feat-fix: updated comment for better clarity * feat-fix: more guards, more better * feat-fix: clearer function interface * feat-fix: removed unused import No I didn't push with --no-verify * feat-fix: removed console.log Co-authored-by: Florian Sihler <florian.sihler@uni-ulm.de> * feat-fix: criteria resolve now only requires an idmap * test(lineage): basic test setup * lint-fix: handle linter issues and empty test labels * refactor(lineage): remove redundant promise * doc(lineage): refine wiki entries --------- Co-authored-by: Florian Sihler <florian.sihler@uni-ulm.de>
- Loading branch information
1 parent
7e27718
commit 429eef3
Showing
9 changed files
with
270 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import type { ReplCommand } from './main' | ||
import { PipelineExecutor } from '../../../core/pipeline-executor' | ||
import { DEFAULT_DATAFLOW_PIPELINE } from '../../../core/steps/pipeline/default-pipelines' | ||
import type { RShell } from '../../../r-bridge/shell' | ||
import { requestFromInput } from '../../../r-bridge/retriever' | ||
import type { SingleSlicingCriterion } from '../../../slicing/criterion/parse' | ||
import { slicingCriterionToId } from '../../../slicing/criterion/parse' | ||
import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id' | ||
import type { OutgoingEdges } from '../../../dataflow/graph/graph' | ||
import type { DataflowGraphEdge } from '../../../dataflow/graph/edge' | ||
import { edgeIncludesType, EdgeType } from '../../../dataflow/graph/edge' | ||
import type { DataflowInformation } from '../../../dataflow/info' | ||
import type { NormalizedAst } from '../../../r-bridge/lang-4.x/ast/model/processing/decorate' | ||
import { guard } from '../../../util/assert' | ||
|
||
function splitAt(str: string, idx: number) { | ||
return [str.slice(0, idx), str.slice(idx)] | ||
} | ||
|
||
async function getDfg(shell: RShell, remainingLine: string) { | ||
return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { | ||
shell, | ||
request: requestFromInput(remainingLine.trim()) | ||
}).allRemainingSteps() | ||
} | ||
|
||
function filterRelevantEdges(edge: DataflowGraphEdge) { | ||
return edgeIncludesType(EdgeType.DefinedBy | EdgeType.DefinedByOnCall | EdgeType.Returns | EdgeType.Reads, edge.types) | ||
} | ||
|
||
function pushRelevantEdges(queue: [NodeId, DataflowGraphEdge][], outgoingEdges: OutgoingEdges) { | ||
queue.push(...[...outgoingEdges].filter(([_, edge]) => filterRelevantEdges(edge))) | ||
} | ||
|
||
/** | ||
* Get the lineage of a node in the dataflow graph | ||
* | ||
* @param criterion - The criterion to get the lineage of | ||
* @param ast - The normalized AST | ||
* @param dfg - The dataflow graph | ||
* @returns The lineage of the node represented as a set of node ids | ||
*/ | ||
export function getLineage(criterion: SingleSlicingCriterion, { idMap } : NormalizedAst, dfg: DataflowInformation): Set<NodeId> { | ||
const src = dfg.graph.get(slicingCriterionToId(criterion, idMap)) | ||
guard(src !== undefined, 'The ID pointed to by the criterion does not exist in the dataflow graph') | ||
const [vertex, outgoingEdges] = src | ||
const result: Set<NodeId> = new Set([vertex.id]) | ||
const edgeQueue: [NodeId, DataflowGraphEdge][] = [] | ||
pushRelevantEdges(edgeQueue, outgoingEdges) | ||
|
||
while(edgeQueue.length > 0) { | ||
const [target] = edgeQueue.shift() as [NodeId, DataflowGraphEdge] | ||
if(result.has(target)) { | ||
continue | ||
} | ||
|
||
result.add(target) | ||
|
||
const outgoingEdges = dfg.graph.outgoingEdges(target) | ||
if(outgoingEdges !== undefined) { | ||
pushRelevantEdges(edgeQueue, outgoingEdges) | ||
} | ||
} | ||
|
||
return result | ||
} | ||
|
||
export const getLineageCommand: ReplCommand = { | ||
description: 'Get the lineage of an R object', | ||
usageExample: ':lineage', | ||
aliases: ['lin'], | ||
script: false, | ||
fn: async(output, shell, remainingLine) => { | ||
const [criterion, rest] = splitAt(remainingLine, remainingLine.indexOf(' ')) | ||
const { dataflow: dfg, normalize: ast } = await getDfg(shell, rest) | ||
const lineageIds = getLineage(criterion as SingleSlicingCriterion, ast, dfg) | ||
output.stdout([...lineageIds].join('\n')) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import type { SingleSlicingCriterion } from '../../../../slicing/criterion/parse' | ||
import type { IdMessageBase, MessageDefinition } from './messages' | ||
import type { NodeId } from '../../../../r-bridge/lang-4.x/ast/model/processing/node-id' | ||
import Joi from 'joi' | ||
|
||
export interface LineageRequestMessage extends IdMessageBase { | ||
type: 'request-lineage', | ||
/** The {@link FileAnalysisRequestMessage#filetoken} of the file/data */ | ||
filetoken: string, | ||
/** The criterion to start the lineage from */ | ||
criterion: SingleSlicingCriterion, | ||
} | ||
|
||
export const requestLineageMessage: MessageDefinition<LineageRequestMessage> = { | ||
type: 'request-lineage', | ||
schema: Joi.object({ | ||
type: Joi.string().valid('request-lineage').required(), | ||
id: Joi.string().optional(), | ||
filetoken: Joi.string().required(), | ||
criterion: Joi.string().required() | ||
}) | ||
} | ||
|
||
export interface LineageResponseMessage extends IdMessageBase { | ||
type: 'response-lineage', | ||
/** The lineage of the given criterion. With this being the representation of a set, there is no guarantee about order. */ | ||
lineage: NodeId[] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import { withShell } from '../_helper/shell' | ||
import { PipelineExecutor } from '../../../src/core/pipeline-executor' | ||
import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline/default-pipelines' | ||
import { requestFromInput } from '../../../src/r-bridge/retriever' | ||
import type { SingleSlicingCriterion } from '../../../src/slicing/criterion/parse' | ||
import { getLineage } from '../../../src/cli/repl/commands/lineage' | ||
import type { TestLabel } from '../_helper/label' | ||
import { decorateLabelContext, label } from '../_helper/label' | ||
import type { NodeId } from '../../../src/r-bridge/lang-4.x/ast/model/processing/node-id' | ||
import { assert } from 'chai' | ||
import { setEquals } from '../../../src/util/set' | ||
import { OperatorDatabase } from '../../../src/r-bridge/lang-4.x/ast/model/operators' | ||
|
||
describe('Test lineage', withShell(shell => { | ||
|
||
function assertLineage(title: string | TestLabel, request: string, criterion: SingleSlicingCriterion, expected: NodeId[]) { | ||
const effectiveName = decorateLabelContext(title, ['lineage']) | ||
|
||
return it(effectiveName, async() => { | ||
const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { | ||
shell, | ||
request: requestFromInput(request) | ||
}).allRemainingSteps() | ||
const lineageIds = getLineage(criterion, result.normalize, result.dataflow) | ||
assert.isTrue(setEquals(lineageIds, new Set(expected)), `Expected ${JSON.stringify(expected)} but got ${JSON.stringify([...lineageIds])}`) | ||
}) | ||
} | ||
|
||
assertLineage(label('The demo lineage', [ | ||
'name-normal', ...OperatorDatabase['<-'].capabilities, 'newlines' | ||
]), `c <- x | ||
b <- c | ||
a <- b`, '3@a', [0, 1, 2, 3, 4, 5, 6, 7, 8]) | ||
})) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
429eef3
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"artificial" Benchmark Suite
Retrieve AST from R code
243.5575759090909
ms (107.86543297512463
)238.1723235909091
ms (96.71402866840735
)1.02
Normalize R AST
19.803893954545455
ms (34.45843540804731
)19.4989345
ms (33.57276767882534
)1.02
Produce dataflow information
38.19301140909091
ms (82.05917244485015
)38.792485772727275
ms (84.2875165952748
)0.98
Total per-file
803.4673899090909
ms (1529.7960877051385
)799.2110945
ms (1544.5301502222158
)1.01
Static slicing
1.2176619293341087
ms (1.1120394000143086
)1.1726790800999163
ms (1.0086901141796194
)1.04
Reconstruct code
0.25070812632072736
ms (0.19966603837215122
)0.24335608198030453
ms (0.18573862888815007
)1.03
Total per-slice
1.4848747471150194
ms (1.1618629904438125
)1.4331761674751269
ms (1.0536924338527542
)1.04
failed to reconstruct/re-parse
0
#0
#1
times hit threshold
0
#0
#1
reduction (characters)
0.7869724682442361
#0.786663222057468
#1.00
reduction (normalized tokens)
0.7640044233283717
#0.763664433957929
#1.00
memory (df-graph)
147.58589311079547
KiB (359.2574768951678
)147.66770241477272
KiB (359.55136525995476
)1.00
This comment was automatically generated by workflow using github-action-benchmark.
429eef3
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"social-science" Benchmark Suite
Retrieve AST from R code
250.14352703999998
ms (46.22166925640989
)240.9999736
ms (45.52546037334287
)1.04
Normalize R AST
23.11401898
ms (17.75707281573565
)22.044827100000003
ms (17.4420619617221
)1.05
Produce dataflow information
71.96789064000001
ms (86.92611070053505
)68.41181266
ms (83.25046712198441
)1.05
Total per-file
3589.1298162199996
ms (7924.056376688097
)3603.0243807399997
ms (7958.676569737224
)1.00
Static slicing
7.240201848248687
ms (20.33921754263181
)7.403007833130669
ms (20.923205633042343
)0.98
Reconstruct code
0.27129692298715036
ms (0.17211426023828647
)0.24681178006363166
ms (0.15169934993997963
)1.10
Total per-slice
7.520366205749907
ms (20.369225019376092
)7.657637977886095
ms (20.95063234618626
)0.98
failed to reconstruct/re-parse
0
#0
#1
times hit threshold
0
#0
#1
reduction (characters)
0.9181372100742089
#0.9214445180065712
#1.00
reduction (normalized tokens)
0.884931018000862
#0.88847659105633
#1.00
memory (df-graph)
142.5410546875
KiB (146.7038638918548
)142.5463671875
KiB (146.6995040110581
)1.00
This comment was automatically generated by workflow using github-action-benchmark.