diff --git a/src/cli/repl/commands/commands.ts b/src/cli/repl/commands/commands.ts index bb8e8a91c7..eca9e0725b 100644 --- a/src/cli/repl/commands/commands.ts +++ b/src/cli/repl/commands/commands.ts @@ -13,6 +13,7 @@ import { italic , bold } from '../../../util/ansi' import { splitAtEscapeSensitive } from '../../../util/args' import { guard } from '../../../util/assert' import { scripts } from '../../common/scripts-info' +import { getLineageCommand } from './lineage' function printHelpForScript(script: [string, ReplCommand], f: OutputFormatter): string { const base = ` ${bold(padCmd(':' + script[0]), f)}${script[1].description}` @@ -66,7 +67,8 @@ const _commands: Record = { 'dataflow': dataflowCommand, 'dataflow*': dataflowStarCommand, 'controlflow': controlflowCommand, - 'controlflow*': controlflowStarCommand + 'controlflow*': controlflowStarCommand, + 'lineage': getLineageCommand } let commandsInitialized = false diff --git a/src/cli/repl/commands/lineage.ts b/src/cli/repl/commands/lineage.ts new file mode 100644 index 0000000000..d3b2821268 --- /dev/null +++ b/src/cli/repl/commands/lineage.ts @@ -0,0 +1,79 @@ +import type { ReplCommand } from './main' +import { PipelineExecutor } from '../../../core/pipeline-executor' +import { DEFAULT_DATAFLOW_PIPELINE } from '../../../core/steps/pipeline/default-pipelines' +import type { RShell } from '../../../r-bridge/shell' +import { requestFromInput } from '../../../r-bridge/retriever' +import type { SingleSlicingCriterion } from '../../../slicing/criterion/parse' +import { slicingCriterionToId } from '../../../slicing/criterion/parse' +import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id' +import type { OutgoingEdges } from '../../../dataflow/graph/graph' +import type { DataflowGraphEdge } from '../../../dataflow/graph/edge' +import { edgeIncludesType, EdgeType } from '../../../dataflow/graph/edge' +import type { DataflowInformation } from '../../../dataflow/info' +import type { NormalizedAst } from '../../../r-bridge/lang-4.x/ast/model/processing/decorate' +import { guard } from '../../../util/assert' + +function splitAt(str: string, idx: number) { + return [str.slice(0, idx), str.slice(idx)] +} + +async function getDfg(shell: RShell, remainingLine: string) { + return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput(remainingLine.trim()) + }).allRemainingSteps() +} + +function filterRelevantEdges(edge: DataflowGraphEdge) { + return edgeIncludesType(EdgeType.DefinedBy | EdgeType.DefinedByOnCall | EdgeType.Returns | EdgeType.Reads, edge.types) +} + +function pushRelevantEdges(queue: [NodeId, DataflowGraphEdge][], outgoingEdges: OutgoingEdges) { + queue.push(...[...outgoingEdges].filter(([_, edge]) => filterRelevantEdges(edge))) +} + +/** + * Get the lineage of a node in the dataflow graph + * + * @param criterion - The criterion to get the lineage of + * @param ast - The normalized AST + * @param dfg - The dataflow graph + * @returns The lineage of the node represented as a set of node ids + */ +export function getLineage(criterion: SingleSlicingCriterion, { idMap } : NormalizedAst, dfg: DataflowInformation): Set { + const src = dfg.graph.get(slicingCriterionToId(criterion, idMap)) + guard(src !== undefined, 'The ID pointed to by the criterion does not exist in the dataflow graph') + const [vertex, outgoingEdges] = src + const result: Set = new Set([vertex.id]) + const edgeQueue: [NodeId, DataflowGraphEdge][] = [] + pushRelevantEdges(edgeQueue, outgoingEdges) + + while(edgeQueue.length > 0) { + const [target] = edgeQueue.shift() as [NodeId, DataflowGraphEdge] + if(result.has(target)) { + continue + } + + result.add(target) + + const outgoingEdges = dfg.graph.outgoingEdges(target) + if(outgoingEdges !== undefined) { + pushRelevantEdges(edgeQueue, outgoingEdges) + } + } + + return result +} + +export const getLineageCommand: ReplCommand = { + description: 'Get the lineage of an R object', + usageExample: ':lineage', + aliases: ['lin'], + script: false, + fn: async(output, shell, remainingLine) => { + const [criterion, rest] = splitAt(remainingLine, remainingLine.indexOf(' ')) + const { dataflow: dfg, normalize: ast } = await getDfg(shell, rest) + const lineageIds = getLineage(criterion as SingleSlicingCriterion, ast, dfg) + output.stdout([...lineageIds].join('\n')) + } +} diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index 583d8c3d6a..4eb23e2c3c 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -35,6 +35,10 @@ import fs from 'fs' import type { RParseRequests } from '../../../r-bridge/retriever' import { autoSelectLibrary } from '../../../reconstruct/auto-select/auto-select-defaults' import { makeMagicCommentHandler } from '../../../reconstruct/auto-select/magic-comments' +import type { LineageRequestMessage, LineageResponseMessage } from './messages/lineage' +import { requestLineageMessage } from './messages/lineage' +import { getLineage } from '../commands/lineage' +import { guard } from '../../../util/assert' /** * Each connection handles a single client, answering to its requests. @@ -92,6 +96,9 @@ export class FlowRServerConnection { case 'request-repl-execution': this.handleRepl(request.message as ExecuteRequestMessage) break + case 'request-lineage': + this.handleLineageRequest(request.message as LineageRequestMessage) + break default: sendMessage(this.socket, { id: request.message.id, @@ -277,6 +284,38 @@ export class FlowRServerConnection { }) } + private handleLineageRequest(base: LineageRequestMessage) { + const requestResult = validateMessage(base, requestLineageMessage) + + if(requestResult.type === 'error') { + answerForValidationError(this.socket, requestResult, base.id) + return + } + + const request = requestResult.message + this.logger.info(`[${this.name}] Received lineage request for criterion ${request.criterion}`) + + const fileInformation = this.fileMap.get(request.filetoken) + if(!fileInformation) { + sendMessage(this.socket, { + id: request.id, + type: 'error', + fatal: false, + reason: `The file token ${request.filetoken} has never been analyzed.` + }) + return + } + + const { dataflow: dfg, normalize: ast } = fileInformation.pipeline.getResults(true) + guard(dfg !== undefined, `Dataflow graph must be present (request: ${request.filetoken})`) + guard(ast !== undefined, `AST must be present (request: ${request.filetoken})`) + const lineageIds = getLineage(request.criterion, ast, dfg) + sendMessage(this.socket, { + type: 'response-lineage', + id: request.id, + lineage: [...lineageIds] + }) + } } export function sanitizeAnalysisResults(results: Partial>): DeepPartial> { diff --git a/src/cli/repl/server/messages/lineage.ts b/src/cli/repl/server/messages/lineage.ts new file mode 100644 index 0000000000..55271d0cf2 --- /dev/null +++ b/src/cli/repl/server/messages/lineage.ts @@ -0,0 +1,28 @@ +import type { SingleSlicingCriterion } from '../../../../slicing/criterion/parse' +import type { IdMessageBase, MessageDefinition } from './messages' +import type { NodeId } from '../../../../r-bridge/lang-4.x/ast/model/processing/node-id' +import Joi from 'joi' + +export interface LineageRequestMessage extends IdMessageBase { + type: 'request-lineage', + /** The {@link FileAnalysisRequestMessage#filetoken} of the file/data */ + filetoken: string, + /** The criterion to start the lineage from */ + criterion: SingleSlicingCriterion, +} + +export const requestLineageMessage: MessageDefinition = { + type: 'request-lineage', + schema: Joi.object({ + type: Joi.string().valid('request-lineage').required(), + id: Joi.string().optional(), + filetoken: Joi.string().required(), + criterion: Joi.string().required() + }) +} + +export interface LineageResponseMessage extends IdMessageBase { + type: 'response-lineage', + /** The lineage of the given criterion. With this being the representation of a set, there is no guarantee about order. */ + lineage: NodeId[] +} diff --git a/src/cli/repl/server/messages/messages.ts b/src/cli/repl/server/messages/messages.ts index 3d3c05d549..9853554ed9 100644 --- a/src/cli/repl/server/messages/messages.ts +++ b/src/cli/repl/server/messages/messages.ts @@ -9,6 +9,7 @@ import type { FileAnalysisRequestMessage, FileAnalysisResponseMessageJson } from import type { ExecuteEndMessage, ExecuteIntermediateResponseMessage, ExecuteRequestMessage } from './repl' import type { SliceRequestMessage, SliceResponseMessage } from './slice' import type { FlowrErrorMessage } from './error' +import type { LineageRequestMessage, LineageResponseMessage } from './lineage' /** * If you send a message it must *not* contain a newline but the message must be terminated by a newline. @@ -51,4 +52,6 @@ export type FlowrMessage = FlowrHelloResponseMessage | ExecuteEndMessage | SliceRequestMessage | SliceResponseMessage + | LineageRequestMessage + | LineageResponseMessage | FlowrErrorMessage diff --git a/test/functionality/_helper/label.ts b/test/functionality/_helper/label.ts index b55a2f9d6f..a4ecda0ddb 100644 --- a/test/functionality/_helper/label.ts +++ b/test/functionality/_helper/label.ts @@ -20,7 +20,7 @@ const uniqueTestId = (() => { })() -const TestLabelContexts = ['parse', 'desugar', 'dataflow', 'other', 'slice', 'output'] as const +const TestLabelContexts = ['parse', 'desugar', 'dataflow', 'other', 'slice', 'output', 'lineage'] as const export type TestLabelContext = typeof TestLabelContexts[number] export interface TestLabel extends MergeableRecord { diff --git a/test/functionality/lineage/lineage.spec.ts b/test/functionality/lineage/lineage.spec.ts new file mode 100644 index 0000000000..02e3e6af98 --- /dev/null +++ b/test/functionality/lineage/lineage.spec.ts @@ -0,0 +1,34 @@ +import { withShell } from '../_helper/shell' +import { PipelineExecutor } from '../../../src/core/pipeline-executor' +import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline/default-pipelines' +import { requestFromInput } from '../../../src/r-bridge/retriever' +import type { SingleSlicingCriterion } from '../../../src/slicing/criterion/parse' +import { getLineage } from '../../../src/cli/repl/commands/lineage' +import type { TestLabel } from '../_helper/label' +import { decorateLabelContext, label } from '../_helper/label' +import type { NodeId } from '../../../src/r-bridge/lang-4.x/ast/model/processing/node-id' +import { assert } from 'chai' +import { setEquals } from '../../../src/util/set' +import { OperatorDatabase } from '../../../src/r-bridge/lang-4.x/ast/model/operators' + +describe('Test lineage', withShell(shell => { + + function assertLineage(title: string | TestLabel, request: string, criterion: SingleSlicingCriterion, expected: NodeId[]) { + const effectiveName = decorateLabelContext(title, ['lineage']) + + return it(effectiveName, async() => { + const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput(request) + }).allRemainingSteps() + const lineageIds = getLineage(criterion, result.normalize, result.dataflow) + assert.isTrue(setEquals(lineageIds, new Set(expected)), `Expected ${JSON.stringify(expected)} but got ${JSON.stringify([...lineageIds])}`) + }) + } + + assertLineage(label('The demo lineage', [ + 'name-normal', ...OperatorDatabase['<-'].capabilities, 'newlines' + ]), `c <- x +b <- c +a <- b`, '3@a', [0, 1, 2, 3, 4, 5, 6, 7, 8]) +})) diff --git a/test/functionality/slicing/static-program-slices/calls-tests.ts b/test/functionality/slicing/static-program-slices/calls-tests.ts index c04cca5575..38afb9058d 100644 --- a/test/functionality/slicing/static-program-slices/calls-tests.ts +++ b/test/functionality/slicing/static-program-slices/calls-tests.ts @@ -506,8 +506,11 @@ print(x)`, ['4@x'], 'x <- 3\nx'/*, { expectedOutput: '[1] 2' }*/) \`<-\` <- \`*\` x <- 3 print(y = x)`, ['4@y'], 'y=x') - assertSliced(label('redefine in local scope', []), - shell, `f <- function() { + assertSliced(label('redefine in local scope', [ + 'newlines', ...OperatorDatabase['<-'].capabilities, ...OperatorDatabase['*'].capabilities, + 'numbers', 'name-escaped', 'call-normal', 'function-definitions', 'redefinition-of-built-in-functions-primitives' + ]), + shell, `f <- function() { x <- 2 \`<-\` <- \`*\` x <- 3 @@ -565,7 +568,7 @@ foo(.x = f(3))`) assertSliced(label('nested definition in unknown foo', capabilities), shell, 'x <- function() { 3 }\nfoo(.x = function(y) { c(X = x()) })', ['2@foo'], 'x <- function() { 3 }\nfoo(.x = function(y) { c(X = x()) })') - assertSliced(label('nested definition in unknown foo with reference', []), shell, + assertSliced(label('nested definition in unknown foo with reference', capabilities), shell, 'x <- function() { 3 }\ng = function(y) { c(X = x()) }\nfoo(.x = g)', ['3@foo'], 'x <- function() { 3 }\ng = function(y) { c(X = x()) }\nfoo(.x = g)') }) diff --git a/wiki/Interface.md b/wiki/Interface.md index f52476aa63..47319f5441 100644 --- a/wiki/Interface.md +++ b/wiki/Interface.md @@ -12,6 +12,7 @@ Although far from being as detailed as the in-depth explanation of [_flowR_](htt - [The Slice Request](#the-slice-request) - [Magic Comments](#magic-comments) - [The REPL Request](#the-repl-request) + - [The Lineage Request](#the-lineage-request) - [💻 Using the REPL](#-using-the-repl) - [Example: Retrieving the Dataflow Graph](#example-retrieving-the-dataflow-graph) - [Interfacing with the File System](#interfacing-with-the-file-system) @@ -60,7 +61,7 @@ See the implementation of the hello message for more information regarding the c
Example Message -*Note:* even though we pretty-print these messages, they are sent as a single line, ending with a newline. +_Note:_ even though we pretty-print these messages, they are sent as a single line, ending with a newline. ```json { @@ -102,7 +103,7 @@ sequenceDiagram
The request allows the server to analyze a file and prepare it for slicing. -The message can contain a `filetoken`, which is used to identify the file in later slice requests (if you do not add one, the request will not be stored and therefore, it is not available for subsequent slicing). +The message can contain a `filetoken`, which is used to identify the file in later slice or lineage requests (if you do not add one, the request will not be stored and therefore, it is not available for subsequent requests). > [!IMPORTANT] > If you want to send and process a lot of analysis requests, but do not want to slice them, please do not pass the `filetoken` field. This will save the server a lot of memory allocation. @@ -114,7 +115,7 @@ See the implementation of the request-file-analysis message for more information
Example Request -*Note:* even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. +_Note:_ even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. ```json { @@ -130,7 +131,7 @@ See the implementation of the request-file-analysis message for more information
Example Response (Long) -*Note:* even though we pretty-print these responses, they are sent as a single line, ending with a newline. +_Note:_ even though we pretty-print these responses, they are sent as a single line, ending with a newline. The `results` field of the response effectively contains three keys of importance: @@ -2256,7 +2257,7 @@ It contains a human-readable description *why* the analysis failed (see the erro
Example Error Message -*Note:* even though we pretty-print these messages, they are sent as a single line, ending with a newline. +_Note:_ even though we pretty-print these messages, they are sent as a single line, ending with a newline. ```json { @@ -2277,7 +2278,7 @@ For this, the analysis request may add `cfg: true` to its list of options.
Example Request -*Note:* even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. +_Note:_ even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. ```json { @@ -2294,7 +2295,7 @@ For this, the analysis request may add `cfg: true` to its list of options.
Example Response (Shortened) -*Note:* even though we pretty-print these messages, they are sent as a single line, ending with a newline. +_Note:_ even though we pretty-print these messages, they are sent as a single line, ending with a newline. The response is basically the same as the response sent without the `cfg` flag. The following only shows important additions. If you are interested in a visual representation of the control flow graph, see the [mermaid visualization](https://mermaid.live/edit#base64:eyJjb2RlIjoiZmxvd2NoYXJ0IFREXG4gICAgbjBbXCJgUlN5bWJvbCAoMClcbid4J2BcIl1cbiAgICBuMVtcImBSTnVtYmVyICgxKVxuJzEnYFwiXVxuICAgIG4yW1wiYFJCaW5hcnlPcCAoMilcbid4IDwtIDEnYFwiXVxuICAgIG4zW1wiYFJTeW1ib2wgKDMpXG4neCdgXCJdXG4gICAgbjRbXCJgUk51bWJlciAoNClcbicxJ2BcIl1cbiAgICBuNVtcImBSQmluYXJ5T3AgKDUpXG4neCArIDEnYFwiXVxuICAgIG4xIC0uLT58XCJGRFwifCBuMFxuICAgIG4wIC0uLT58XCJGRFwifCBuMlxuICAgIG41IC0uLT58XCJGRFwifCBuMVxuICAgIG40IC0uLT58XCJGRFwifCBuM1xuICAgIG4zIC0uLT58XCJGRFwifCBuNVxuIiwibWVybWFpZCI6e30sInVwZGF0ZUVkaXRvciI6ZmFsc2UsImF1dG9TeW5jIjp0cnVlLCJ1cGRhdGVEaWFncmFtIjpmYWxzZX0=) (although it is really simple). @@ -2486,7 +2487,7 @@ The default response is formatted as JSON. However, by specifying `format: "n-qu
Example Request -*Note:* even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. +_Note:_ even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. ```json { @@ -2505,7 +2506,7 @@ The default response is formatted as JSON. However, by specifying `format: "n-qu
Example Response (Long) -*Note:* even though we pretty-print these messages, they are sent as a single line, ending with a newline. +_Note:_ even though we pretty-print these messages, they are sent as a single line, ending with a newline. Please note, that the base message format is still JSON. Only the individual results get converted. While the context is derived from the `filename`, we currently offer no way to customize other configurations (please open a [new issue](https://github.com/flowr-analysis/flowr/issues/new/choose) if you require this). @@ -2604,7 +2605,7 @@ Additionally, you may pass `"noMagicComments": true` to disable the automatic se
Example Request -*Note:* even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. +_Note:_ even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. This request is the logical succession of the file analysis example above which uses the `filetoken`: `"x"`. @@ -2629,7 +2630,7 @@ x + 1
Example Response -*Note:* even though we pretty-print these responses, they are sent as a single line, ending with a newline. +_Note:_ even though we pretty-print these responses, they are sent as a single line, ending with a newline. The `results` field of the response contains two keys of importance: @@ -2730,7 +2731,7 @@ The semantics of the error message are similar to that of the other messages.
Example Request -*Note:* even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. +_Note:_ even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. For this request to work, you have to start the server with the `--r-session-access` flag. ```json @@ -2746,7 +2747,7 @@ For this request to work, you have to start the server with the `--r-session-acc
Example Response -*Note:* even though we pretty-print these responses, they are sent as a single line, ending with a newline. +_Note:_ even though we pretty-print these responses, they are sent as a single line, ending with a newline. Prompting with `1+1` only produces one `response-repl-execution` message: @@ -2770,6 +2771,69 @@ The `stream` field (either `stdout` or `stderr`) informs you of the output's ori
+### The Lineage Request + +
+Sequence Diagram + +```mermaid +sequenceDiagram + autonumber + participant Client + participant Server + + Client->>+Server: request-lineage + + alt + Server-->>Client: response-lineage + else + Server-->>Client: error + end + deactivate Server +``` + +
+ +In order to retrieve the lineage of an object, you have to send a file analysis request first. The `filetoken` you assign is of use here as you can re-use it to repeatedly retrieve the lineage of the same file. +Besides that, you will need to add a [criterion](https://github.com/flowr-analysis/flowr/wiki/Terminology#slicing-criterion) that specifies the object whose lineage you're interested in. + +
+Example Request + +_Note:_ even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. + +This request is the logical succession of the file analysis example above which uses the `filetoken`: `"x"`. + +```json +{ + "type": "request-lineage", + "id": "2", + "filetoken": "x", + "criterion": "2@x" +} +``` + +
+ +
+Example Response + +_Note:_ even though we pretty-print these responses, they are sent as a single line, ending with a newline. + +The response contains the lineage of the desired object in form of an array of IDs (as the representation of a set). + +```json +{ + "type": "response-lineage", + "id": "2", + "lineage": [3,0,1,2] +} +``` + +If an error occurred, the server will set the responses `type` to `"error"` and provide a message in the `reason` field. + +
+ ## 💻 Using the REPL > [!NOTE]