Skip to content

Commit

Permalink
Add additional ":lineage" command and message (#915)
Browse files Browse the repository at this point in the history
* feat: first attempt at the new getLineage command

* feat: walk additional edges for determining the lineage

* feat: store lineage IDs in a set

* feat: prevent infinite recursion when determining the lineage IDs

* feat-fix: fixed lineage network request

* feat-fix: get the dfg and ast from the pipeline in the correct way

* doc: added lineage request doc

* feat-fix: add filetoken to error messages

Co-authored-by: Florian Sihler <florian.sihler@uni-ulm.de>

* feat-fix: updated comment for better clarity

* feat-fix: more guards, more better

* feat-fix: clearer function interface

* feat-fix: removed unused import

No I didn't push with --no-verify

* feat-fix: removed console.log

Co-authored-by: Florian Sihler <florian.sihler@uni-ulm.de>

* feat-fix: criteria resolve now only requires an idmap

* test(lineage): basic test setup

* lint-fix: handle linter issues and empty test labels

* refactor(lineage): remove redundant promise

* doc(lineage): refine wiki entries

---------

Co-authored-by: Florian Sihler <florian.sihler@uni-ulm.de>
  • Loading branch information
LukasPietzschmann and EagleoutIce committed Aug 30, 2024
1 parent 7e27718 commit 429eef3
Show file tree
Hide file tree
Showing 9 changed files with 270 additions and 18 deletions.
4 changes: 3 additions & 1 deletion src/cli/repl/commands/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { italic , bold } from '../../../util/ansi'
import { splitAtEscapeSensitive } from '../../../util/args'
import { guard } from '../../../util/assert'
import { scripts } from '../../common/scripts-info'
import { getLineageCommand } from './lineage'

function printHelpForScript(script: [string, ReplCommand], f: OutputFormatter): string {
const base = ` ${bold(padCmd(':' + script[0]), f)}${script[1].description}`
Expand Down Expand Up @@ -66,7 +67,8 @@ const _commands: Record<string, ReplCommand> = {
'dataflow': dataflowCommand,
'dataflow*': dataflowStarCommand,
'controlflow': controlflowCommand,
'controlflow*': controlflowStarCommand
'controlflow*': controlflowStarCommand,
'lineage': getLineageCommand
}
let commandsInitialized = false

Expand Down
79 changes: 79 additions & 0 deletions src/cli/repl/commands/lineage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import type { ReplCommand } from './main'
import { PipelineExecutor } from '../../../core/pipeline-executor'
import { DEFAULT_DATAFLOW_PIPELINE } from '../../../core/steps/pipeline/default-pipelines'
import type { RShell } from '../../../r-bridge/shell'
import { requestFromInput } from '../../../r-bridge/retriever'
import type { SingleSlicingCriterion } from '../../../slicing/criterion/parse'
import { slicingCriterionToId } from '../../../slicing/criterion/parse'
import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'
import type { OutgoingEdges } from '../../../dataflow/graph/graph'
import type { DataflowGraphEdge } from '../../../dataflow/graph/edge'
import { edgeIncludesType, EdgeType } from '../../../dataflow/graph/edge'
import type { DataflowInformation } from '../../../dataflow/info'
import type { NormalizedAst } from '../../../r-bridge/lang-4.x/ast/model/processing/decorate'
import { guard } from '../../../util/assert'

function splitAt(str: string, idx: number) {
return [str.slice(0, idx), str.slice(idx)]
}

async function getDfg(shell: RShell, remainingLine: string) {
return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, {
shell,
request: requestFromInput(remainingLine.trim())
}).allRemainingSteps()
}

function filterRelevantEdges(edge: DataflowGraphEdge) {
return edgeIncludesType(EdgeType.DefinedBy | EdgeType.DefinedByOnCall | EdgeType.Returns | EdgeType.Reads, edge.types)
}

function pushRelevantEdges(queue: [NodeId, DataflowGraphEdge][], outgoingEdges: OutgoingEdges) {
queue.push(...[...outgoingEdges].filter(([_, edge]) => filterRelevantEdges(edge)))
}

/**
* Get the lineage of a node in the dataflow graph
*
* @param criterion - The criterion to get the lineage of
* @param ast - The normalized AST
* @param dfg - The dataflow graph
* @returns The lineage of the node represented as a set of node ids
*/
export function getLineage(criterion: SingleSlicingCriterion, { idMap } : NormalizedAst, dfg: DataflowInformation): Set<NodeId> {
const src = dfg.graph.get(slicingCriterionToId(criterion, idMap))
guard(src !== undefined, 'The ID pointed to by the criterion does not exist in the dataflow graph')
const [vertex, outgoingEdges] = src
const result: Set<NodeId> = new Set([vertex.id])
const edgeQueue: [NodeId, DataflowGraphEdge][] = []
pushRelevantEdges(edgeQueue, outgoingEdges)

while(edgeQueue.length > 0) {
const [target] = edgeQueue.shift() as [NodeId, DataflowGraphEdge]
if(result.has(target)) {
continue
}

result.add(target)

const outgoingEdges = dfg.graph.outgoingEdges(target)
if(outgoingEdges !== undefined) {
pushRelevantEdges(edgeQueue, outgoingEdges)
}
}

return result
}

export const getLineageCommand: ReplCommand = {
description: 'Get the lineage of an R object',
usageExample: ':lineage',
aliases: ['lin'],
script: false,
fn: async(output, shell, remainingLine) => {
const [criterion, rest] = splitAt(remainingLine, remainingLine.indexOf(' '))
const { dataflow: dfg, normalize: ast } = await getDfg(shell, rest)
const lineageIds = getLineage(criterion as SingleSlicingCriterion, ast, dfg)
output.stdout([...lineageIds].join('\n'))
}
}
39 changes: 39 additions & 0 deletions src/cli/repl/server/connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ import fs from 'fs'
import type { RParseRequests } from '../../../r-bridge/retriever'
import { autoSelectLibrary } from '../../../reconstruct/auto-select/auto-select-defaults'
import { makeMagicCommentHandler } from '../../../reconstruct/auto-select/magic-comments'
import type { LineageRequestMessage, LineageResponseMessage } from './messages/lineage'
import { requestLineageMessage } from './messages/lineage'
import { getLineage } from '../commands/lineage'
import { guard } from '../../../util/assert'

/**
* Each connection handles a single client, answering to its requests.
Expand Down Expand Up @@ -92,6 +96,9 @@ export class FlowRServerConnection {
case 'request-repl-execution':
this.handleRepl(request.message as ExecuteRequestMessage)
break
case 'request-lineage':
this.handleLineageRequest(request.message as LineageRequestMessage)
break
default:
sendMessage<FlowrErrorMessage>(this.socket, {
id: request.message.id,
Expand Down Expand Up @@ -277,6 +284,38 @@ export class FlowRServerConnection {
})
}

private handleLineageRequest(base: LineageRequestMessage) {
const requestResult = validateMessage(base, requestLineageMessage)

if(requestResult.type === 'error') {
answerForValidationError(this.socket, requestResult, base.id)
return
}

const request = requestResult.message
this.logger.info(`[${this.name}] Received lineage request for criterion ${request.criterion}`)

const fileInformation = this.fileMap.get(request.filetoken)
if(!fileInformation) {
sendMessage<FlowrErrorMessage>(this.socket, {
id: request.id,
type: 'error',
fatal: false,
reason: `The file token ${request.filetoken} has never been analyzed.`
})
return
}

const { dataflow: dfg, normalize: ast } = fileInformation.pipeline.getResults(true)
guard(dfg !== undefined, `Dataflow graph must be present (request: ${request.filetoken})`)
guard(ast !== undefined, `AST must be present (request: ${request.filetoken})`)
const lineageIds = getLineage(request.criterion, ast, dfg)
sendMessage<LineageResponseMessage>(this.socket, {
type: 'response-lineage',
id: request.id,
lineage: [...lineageIds]
})
}
}

export function sanitizeAnalysisResults(results: Partial<PipelineOutput<typeof DEFAULT_SLICING_PIPELINE>>): DeepPartial<PipelineOutput<typeof DEFAULT_SLICING_PIPELINE>> {
Expand Down
28 changes: 28 additions & 0 deletions src/cli/repl/server/messages/lineage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import type { SingleSlicingCriterion } from '../../../../slicing/criterion/parse'
import type { IdMessageBase, MessageDefinition } from './messages'
import type { NodeId } from '../../../../r-bridge/lang-4.x/ast/model/processing/node-id'
import Joi from 'joi'

export interface LineageRequestMessage extends IdMessageBase {
type: 'request-lineage',
/** The {@link FileAnalysisRequestMessage#filetoken} of the file/data */
filetoken: string,
/** The criterion to start the lineage from */
criterion: SingleSlicingCriterion,
}

export const requestLineageMessage: MessageDefinition<LineageRequestMessage> = {
type: 'request-lineage',
schema: Joi.object({
type: Joi.string().valid('request-lineage').required(),
id: Joi.string().optional(),
filetoken: Joi.string().required(),
criterion: Joi.string().required()
})
}

export interface LineageResponseMessage extends IdMessageBase {
type: 'response-lineage',
/** The lineage of the given criterion. With this being the representation of a set, there is no guarantee about order. */
lineage: NodeId[]
}
3 changes: 3 additions & 0 deletions src/cli/repl/server/messages/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import type { FileAnalysisRequestMessage, FileAnalysisResponseMessageJson } from
import type { ExecuteEndMessage, ExecuteIntermediateResponseMessage, ExecuteRequestMessage } from './repl'
import type { SliceRequestMessage, SliceResponseMessage } from './slice'
import type { FlowrErrorMessage } from './error'
import type { LineageRequestMessage, LineageResponseMessage } from './lineage'

/**
* If you send a message it must *not* contain a newline but the message must be terminated by a newline.
Expand Down Expand Up @@ -51,4 +52,6 @@ export type FlowrMessage = FlowrHelloResponseMessage
| ExecuteEndMessage
| SliceRequestMessage
| SliceResponseMessage
| LineageRequestMessage
| LineageResponseMessage
| FlowrErrorMessage
2 changes: 1 addition & 1 deletion test/functionality/_helper/label.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ const uniqueTestId = (() => {
})()


const TestLabelContexts = ['parse', 'desugar', 'dataflow', 'other', 'slice', 'output'] as const
const TestLabelContexts = ['parse', 'desugar', 'dataflow', 'other', 'slice', 'output', 'lineage'] as const
export type TestLabelContext = typeof TestLabelContexts[number]

export interface TestLabel extends MergeableRecord {
Expand Down
34 changes: 34 additions & 0 deletions test/functionality/lineage/lineage.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { withShell } from '../_helper/shell'
import { PipelineExecutor } from '../../../src/core/pipeline-executor'
import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline/default-pipelines'
import { requestFromInput } from '../../../src/r-bridge/retriever'
import type { SingleSlicingCriterion } from '../../../src/slicing/criterion/parse'
import { getLineage } from '../../../src/cli/repl/commands/lineage'
import type { TestLabel } from '../_helper/label'
import { decorateLabelContext, label } from '../_helper/label'
import type { NodeId } from '../../../src/r-bridge/lang-4.x/ast/model/processing/node-id'
import { assert } from 'chai'
import { setEquals } from '../../../src/util/set'
import { OperatorDatabase } from '../../../src/r-bridge/lang-4.x/ast/model/operators'

describe('Test lineage', withShell(shell => {

function assertLineage(title: string | TestLabel, request: string, criterion: SingleSlicingCriterion, expected: NodeId[]) {
const effectiveName = decorateLabelContext(title, ['lineage'])

return it(effectiveName, async() => {
const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, {
shell,
request: requestFromInput(request)
}).allRemainingSteps()
const lineageIds = getLineage(criterion, result.normalize, result.dataflow)
assert.isTrue(setEquals(lineageIds, new Set(expected)), `Expected ${JSON.stringify(expected)} but got ${JSON.stringify([...lineageIds])}`)
})
}

assertLineage(label('The demo lineage', [
'name-normal', ...OperatorDatabase['<-'].capabilities, 'newlines'
]), `c <- x
b <- c
a <- b`, '3@a', [0, 1, 2, 3, 4, 5, 6, 7, 8])
}))
Original file line number Diff line number Diff line change
Expand Up @@ -506,8 +506,11 @@ print(x)`, ['4@x'], 'x <- 3\nx'/*, { expectedOutput: '[1] 2' }*/)
\`<-\` <- \`*\`
x <- 3
print(y = x)`, ['4@y'], 'y=x')
assertSliced(label('redefine in local scope', []),
shell, `f <- function() {
assertSliced(label('redefine in local scope', [
'newlines', ...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['*'].capabilities,
'numbers', 'name-escaped', 'call-normal', 'function-definitions', 'redefinition-of-built-in-functions-primitives'
]),
shell, `f <- function() {
x <- 2
\`<-\` <- \`*\`
x <- 3
Expand Down Expand Up @@ -565,7 +568,7 @@ foo(.x = f(3))`)
assertSliced(label('nested definition in unknown foo', capabilities), shell,
'x <- function() { 3 }\nfoo(.x = function(y) { c(X = x()) })', ['2@foo'],
'x <- function() { 3 }\nfoo(.x = function(y) { c(X = x()) })')
assertSliced(label('nested definition in unknown foo with reference', []), shell,
assertSliced(label('nested definition in unknown foo with reference', capabilities), shell,
'x <- function() { 3 }\ng = function(y) { c(X = x()) }\nfoo(.x = g)', ['3@foo'],
'x <- function() { 3 }\ng = function(y) { c(X = x()) }\nfoo(.x = g)')
})
Expand Down
Loading

2 comments on commit 429eef3

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: 429eef3 Previous: 4fb2496 Ratio
Retrieve AST from R code 243.5575759090909 ms (107.86543297512463) 238.1723235909091 ms (96.71402866840735) 1.02
Normalize R AST 19.803893954545455 ms (34.45843540804731) 19.4989345 ms (33.57276767882534) 1.02
Produce dataflow information 38.19301140909091 ms (82.05917244485015) 38.792485772727275 ms (84.2875165952748) 0.98
Total per-file 803.4673899090909 ms (1529.7960877051385) 799.2110945 ms (1544.5301502222158) 1.01
Static slicing 1.2176619293341087 ms (1.1120394000143086) 1.1726790800999163 ms (1.0086901141796194) 1.04
Reconstruct code 0.25070812632072736 ms (0.19966603837215122) 0.24335608198030453 ms (0.18573862888815007) 1.03
Total per-slice 1.4848747471150194 ms (1.1618629904438125) 1.4331761674751269 ms (1.0536924338527542) 1.04
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.7869724682442361 # 0.786663222057468 # 1.00
reduction (normalized tokens) 0.7640044233283717 # 0.763664433957929 # 1.00
memory (df-graph) 147.58589311079547 KiB (359.2574768951678) 147.66770241477272 KiB (359.55136525995476) 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: 429eef3 Previous: 4fb2496 Ratio
Retrieve AST from R code 250.14352703999998 ms (46.22166925640989) 240.9999736 ms (45.52546037334287) 1.04
Normalize R AST 23.11401898 ms (17.75707281573565) 22.044827100000003 ms (17.4420619617221) 1.05
Produce dataflow information 71.96789064000001 ms (86.92611070053505) 68.41181266 ms (83.25046712198441) 1.05
Total per-file 3589.1298162199996 ms (7924.056376688097) 3603.0243807399997 ms (7958.676569737224) 1.00
Static slicing 7.240201848248687 ms (20.33921754263181) 7.403007833130669 ms (20.923205633042343) 0.98
Reconstruct code 0.27129692298715036 ms (0.17211426023828647) 0.24681178006363166 ms (0.15169934993997963) 1.10
Total per-slice 7.520366205749907 ms (20.369225019376092) 7.657637977886095 ms (20.95063234618626) 0.98
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.9181372100742089 # 0.9214445180065712 # 1.00
reduction (normalized tokens) 0.884931018000862 # 0.88847659105633 # 1.00
memory (df-graph) 142.5410546875 KiB (146.7038638918548) 142.5463671875 KiB (146.6995040110581) 1.00

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.