Skip to content

Commit

Permalink
Functions Should Be Able to Force Their Arguments (#929)
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleoutIce committed Aug 30, 2024
2 parents 9e99241 + 6ad0857 commit 7e27718
Show file tree
Hide file tree
Showing 26 changed files with 191 additions and 97 deletions.
64 changes: 34 additions & 30 deletions src/dataflow/environments/built-in.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-i
import { EdgeType } from '../graph/edge'
import { processLibrary } from '../internal/process/functions/call/built-in/built-in-library'
import { processSourceCall } from '../internal/process/functions/call/built-in/built-in-source'
import type { ForceArguments } from '../internal/process/functions/call/common'

export const BuiltIn = 'built-in'

Expand Down Expand Up @@ -59,9 +60,9 @@ function defaultBuiltInProcessor<OtherInfo>(
args: readonly RFunctionArgument<OtherInfo & ParentInformation>[],
rootId: NodeId,
data: DataflowProcessorInformation<OtherInfo & ParentInformation>,
config: { returnsNthArgument?: number | 'last', cfg?: ExitPointType, readAllArguments?: boolean, hasUnknownSideEffects?: boolean }
config: { returnsNthArgument?: number | 'last', cfg?: ExitPointType, readAllArguments?: boolean, hasUnknownSideEffects?: boolean } & ForceArguments
): DataflowInformation {
const { information: res, processedArguments } = processKnownFunctionCall({ name, args, rootId, data })
const { information: res, processedArguments } = processKnownFunctionCall({ name, args, rootId, data, forceArgs: config.forceArgs })
if(config.returnsNthArgument !== undefined) {
const arg = config.returnsNthArgument === 'last' ? processedArguments[args.length - 1] : processedArguments[config.returnsNthArgument]
if(arg !== undefined) {
Expand Down Expand Up @@ -167,35 +168,38 @@ registerSimpleFunctions(
'rep', 'seq', 'seq_len', 'seq_along', 'seq.int', 'gsub', 'which', 'class', 'dimnames', 'min', 'max',
'intersect', 'subset', 'match', 'sqrt', 'abs', 'round', 'floor', 'ceiling', 'signif', 'trunc', 'log', 'log10', 'log2', 'sum', 'mean',
'unique', 'paste', 'paste0', 'read.csv', 'stop', 'is.null', 'plot', 'numeric', 'as.character', 'as.integer', 'as.logical', 'as.numeric', 'as.matrix',
'apply', 'lapply', 'sapply', 'tapply', 'mapply', 'do.call', 'rbind', 'nrow', 'ncol', 'tryCatch', 'expression', 'factor',
'do.call', 'rbind', 'nrow', 'ncol', 'tryCatch', 'expression', 'factor',
'missing', 'as.data.frame', 'data.frame', 'na.omit', 'rownames', 'names', 'order', 'length', 'any', 'dim', 'matrix', 'cbind', 'nchar', 't'
)
registerBuiltInFunctions(true, ['print', '('], defaultBuiltInProcessor, { returnsNthArgument: 0 } )
registerBuiltInFunctions(true, ['load', 'load_all'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
registerBuiltInFunctions(false, ['cat', 'switch'], defaultBuiltInProcessor, {} ) /* returns null */
registerBuiltInFunctions(true, ['return'], defaultBuiltInProcessor, { returnsNthArgument: 0, cfg: ExitPointType.Return } )
registerBuiltInFunctions(true, ['break'], defaultBuiltInProcessor, { cfg: ExitPointType.Break } )
registerBuiltInFunctions(true, ['next'], defaultBuiltInProcessor, { cfg: ExitPointType.Next } )
registerBuiltInFunctions(true, ['{'], processExpressionList, {} )
registerBuiltInFunctions(true, ['source'], processSourceCall, { includeFunctionCall: true, forceFollow: false } )
registerBuiltInFunctions(true, ['[', '[['], processAccess, { treatIndicesAsString: false } )
registerBuiltInFunctions(true, ['$', '@'], processAccess, { treatIndicesAsString: true } )
registerBuiltInFunctions(true, ['if', 'ifelse'], processIfThenElse, {} )
registerBuiltInFunctions(true, ['get'], processGet, {} )
registerBuiltInFunctions(false, ['library'], processLibrary, {} )
registerBuiltInFunctions(true, ['<-', '='], processAssignment, { canBeReplacement: true } )
registerBuiltInFunctions(true, [':=', 'assign'], processAssignment, {} )
registerBuiltInFunctions(true, ['delayedAssign'], processAssignment, { quoteSource: true } )
registerBuiltInFunctions(true, ['<<-'], processAssignment, { superAssignment: true, canBeReplacement: true } )
registerBuiltInFunctions(true, ['->'], processAssignment, { swapSourceAndTarget: true, canBeReplacement: true } )
registerBuiltInFunctions(true, ['->>'], processAssignment, { superAssignment: true, swapSourceAndTarget: true, canBeReplacement: true } )
registerBuiltInFunctions(true, ['&&', '&'], processSpecialBinOp, { lazy: true, evalRhsWhen: true } )
registerBuiltInFunctions(true, ['||', '|'], processSpecialBinOp, { lazy: true, evalRhsWhen: false } )
registerBuiltInFunctions(true, ['|>', '%>%'], processPipe, {} )
registerBuiltInFunctions(true, ['function', '\\'], processFunctionDefinition, {} )
registerBuiltInFunctions(true, ['quote', 'substitute', 'bquote'], processQuote, { quoteArgumentsWithIndex: 0 } )
registerBuiltInFunctions(true, ['for'], processForLoop, {} )
registerBuiltInFunctions(true, ['repeat'], processRepeatLoop, {} )
registerBuiltInFunctions(true, ['while'], processWhileLoop, {} )
registerBuiltInFunctions(true, ['apply', 'lapply', 'sapply', 'tapply', 'mapply'], defaultBuiltInProcessor, { forceArgs: [false, true] } )
registerBuiltInFunctions(true, ['print'], defaultBuiltInProcessor, { returnsNthArgument: 0, forceArgs: 'all' as const } )
registerBuiltInFunctions(true, ['('], defaultBuiltInProcessor, { returnsNthArgument: 0 } )
registerBuiltInFunctions(true, ['load', 'load_all'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
registerBuiltInFunctions(false, ['cat'], defaultBuiltInProcessor, { forceArgs: 'all' as const } ) /* returns null */
registerBuiltInFunctions(false, ['switch'], defaultBuiltInProcessor, {} ) /* returns null */
registerBuiltInFunctions(true, ['return'], defaultBuiltInProcessor, { returnsNthArgument: 0, cfg: ExitPointType.Return } )
registerBuiltInFunctions(true, ['break'], defaultBuiltInProcessor, { cfg: ExitPointType.Break } )
registerBuiltInFunctions(true, ['next'], defaultBuiltInProcessor, { cfg: ExitPointType.Next } )
registerBuiltInFunctions(true, ['{'], processExpressionList, {} )
registerBuiltInFunctions(true, ['source'], processSourceCall, { includeFunctionCall: true, forceFollow: false } )
registerBuiltInFunctions(true, ['[', '[['], processAccess, { treatIndicesAsString: false } )
registerBuiltInFunctions(true, ['$', '@'], processAccess, { treatIndicesAsString: true } )
registerBuiltInFunctions(true, ['if', 'ifelse'], processIfThenElse, {} )
registerBuiltInFunctions(true, ['get'], processGet, {} )
registerBuiltInFunctions(false, ['library'], processLibrary, {} )
registerBuiltInFunctions(true, ['<-', '='], processAssignment, { canBeReplacement: true } )
registerBuiltInFunctions(true, [':=', 'assign'], processAssignment, {} )
registerBuiltInFunctions(true, ['delayedAssign'], processAssignment, { quoteSource: true } )
registerBuiltInFunctions(true, ['<<-'], processAssignment, { superAssignment: true, canBeReplacement: true } )
registerBuiltInFunctions(true, ['->'], processAssignment, { swapSourceAndTarget: true, canBeReplacement: true } )
registerBuiltInFunctions(true, ['->>'], processAssignment, { superAssignment: true, swapSourceAndTarget: true, canBeReplacement: true } )
registerBuiltInFunctions(true, ['&&', '&'], processSpecialBinOp, { lazy: true, evalRhsWhen: true } )
registerBuiltInFunctions(true, ['||', '|'], processSpecialBinOp, { lazy: true, evalRhsWhen: false } )
registerBuiltInFunctions(true, ['|>', '%>%'], processPipe, {} )
registerBuiltInFunctions(true, ['function', '\\'], processFunctionDefinition, {} )
registerBuiltInFunctions(true, ['quote', 'substitute', 'bquote'], processQuote, { quoteArgumentsWithIndex: 0 } )
registerBuiltInFunctions(true, ['for'], processForLoop, {} )
registerBuiltInFunctions(true, ['repeat'], processRepeatLoop, {} )
registerBuiltInFunctions(true, ['while'], processWhileLoop, {} )
/* they are all mapped to `<-` but we separate super assignments */
registerReplacementFunctions({ makeMaybe: true }, ['<-', '<<-'], '[', '[[', '$', '@', 'names', 'dimnames', 'attributes', 'attr', 'class', 'levels', 'rownames', 'colnames')
8 changes: 3 additions & 5 deletions src/dataflow/graph/graph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,7 @@ export class DataflowGraph<
private static DEFAULT_ENVIRONMENT: REnvironmentInformation | undefined = undefined
private _idMap: AstIdMap | undefined
/* Set of vertices which have sideEffects that we do not know anything about */
private _unknownSideEffects: Set<NodeId> = new Set<NodeId>()
// this should be linked separately
public readonly functionCache = new Map<NodeId, Set<DataflowGraphVertexInfo>>()
private _unknownSideEffects = new Set<NodeId>()

constructor(idMap: AstIdMap | undefined) {
DataflowGraph.DEFAULT_ENVIRONMENT ??= initializeCleanEnvironments()
Expand Down Expand Up @@ -232,11 +230,11 @@ export class DataflowGraph<
// keep a clone of the original environment
const environment = vertex.environment === undefined ? fallback : cloneEnvironmentInformation(vertex.environment)


this.vertexInformation.set(vertex.id, {
...vertex,
environment
} as unknown as Vertex)

if(asRoot) {
this.rootVertices.add(vertex.id)
}
Expand All @@ -253,7 +251,7 @@ export class DataflowGraph<
* Will insert a new edge into the graph,
* if the direction of the edge is of no importance (`same-read-read` or `same-def-def`), source
* and target will be sorted so that `from` has the lower, and `to` the higher id (default ordering).
* Please note, that this will never make edges to {@link BuiltIn} as they are not part of the graph.
* Please note that this will never make edges to {@link BuiltIn} as they are not part of the graph.
*/
public addEdge(from: NodeId | ReferenceForEdge, to: NodeId | ReferenceForEdge, edgeInfo: EdgeData<Edge>): this {
const { fromId, toId } = extractEdgeIds(from, to)
Expand Down
24 changes: 23 additions & 1 deletion src/dataflow/graph/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ export interface DataflowGraphVertexFunctionDefinition extends DataflowGraphVert
*/
subflow: DataflowFunctionFlowInformation
/**
* All exist points of the function definitions.
* All exit points of the function definitions.
* In other words: last expressions/return calls
*/
exitPoints: readonly NodeId[]
Expand All @@ -98,3 +98,25 @@ export interface DataflowGraphVertexFunctionDefinition extends DataflowGraphVert

export type DataflowGraphVertexArgument = DataflowGraphVertexUse | DataflowGraphVertexVariableDefinition | DataflowGraphVertexFunctionDefinition | DataflowGraphVertexFunctionCall | DataflowGraphValue
export type DataflowGraphVertexInfo = Required<DataflowGraphVertexArgument>


export function isValueVertex(vertex: DataflowGraphVertexBase): vertex is DataflowGraphValue {
return vertex.tag === VertexType.Value
}

export function isUseVertex(vertex: DataflowGraphVertexBase): vertex is DataflowGraphVertexUse {
return vertex.tag === VertexType.Use
}

export function isFunctionCallVertex(vertex: DataflowGraphVertexBase): vertex is DataflowGraphVertexFunctionCall {
return vertex.tag === VertexType.FunctionCall
}

export function isVariableDefinitionVertex(vertex: DataflowGraphVertexBase): vertex is DataflowGraphVertexVariableDefinition {
return vertex.tag === VertexType.VariableDefinition
}

export function isFunctionDefinitionVertex(vertex: DataflowGraphVertexBase): vertex is DataflowGraphVertexFunctionDefinition {
return vertex.tag === VertexType.FunctionDefinition
}

Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,25 @@ import { dataflowLogger } from '../../../../../logger'
import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type'
import { EdgeType } from '../../../../../graph/edge'
import { makeAllMaybe, makeReferenceMaybe } from '../../../../../environments/environment'
import type { ForceArguments } from '../common'

export function processAccess<OtherInfo>(
name: RSymbol<OtherInfo & ParentInformation>,
args: readonly RFunctionArgument<OtherInfo & ParentInformation>[],
rootId: NodeId,
data: DataflowProcessorInformation<OtherInfo & ParentInformation>,
config: { treatIndicesAsString: boolean }
config: { treatIndicesAsString: boolean } & ForceArguments
): DataflowInformation {
if(args.length < 2) {
dataflowLogger.warn(`Access ${name.content} has less than 2 arguments, skipping`)
return processKnownFunctionCall({ name, args, rootId, data }).information
return processKnownFunctionCall({ name, args, rootId, data, forceArgs: config.forceArgs }).information
}
const head = args[0]
guard(head !== EmptyArgument, () => `Access ${name.content} has no source, impossible!`)

let fnCall: ProcessKnownFunctionCallResult
if(!config.treatIndicesAsString) {
fnCall = processKnownFunctionCall({ name, args, rootId, data })
fnCall = processKnownFunctionCall({ name, args, rootId, data, forceArgs: config.forceArgs })
} else {
const newArgs = [...args]
// if the argument is a symbol, we convert it to a string for this perspective
Expand All @@ -51,7 +52,7 @@ export function processAccess<OtherInfo>(
}
}
}
fnCall = processKnownFunctionCall({ name, args: newArgs, rootId, data })
fnCall = processKnownFunctionCall({ name, args: newArgs, rootId, data, forceArgs: config.forceArgs })
}

const info = fnCall.information
Expand Down
Loading

2 comments on commit 7e27718

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: 7e27718 Previous: 4fb2496 Ratio
Retrieve AST from R code 232.57487077272728 ms (96.870408704623) 238.1723235909091 ms (96.71402866840735) 0.98
Normalize R AST 19.216854681818184 ms (33.217496686458794) 19.4989345 ms (33.57276767882534) 0.99
Produce dataflow information 41.034557772727275 ms (95.61992631612254) 38.792485772727275 ms (84.2875165952748) 1.06
Total per-file 772.4148853181819 ms (1458.590470260935) 799.2110945 ms (1544.5301502222158) 0.97
Static slicing 1.1675738998176683 ms (1.032666045356958) 1.1726790800999163 ms (1.0086901141796194) 1.00
Reconstruct code 0.26284474833421706 ms (0.23344240730722182) 0.24335608198030453 ms (0.18573862888815007) 1.08
Total per-slice 1.446781627423689 ms (1.0905044292238035) 1.4331761674751269 ms (1.0536924338527542) 1.01
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.7869724682442361 # 0.786663222057468 # 1.00
reduction (normalized tokens) 0.7640044233283717 # 0.763664433957929 # 1.00
memory (df-graph) 147.58589311079547 KiB (359.2574768951678) 147.66770241477272 KiB (359.55136525995476) 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: 7e27718 Previous: 4fb2496 Ratio
Retrieve AST from R code 240.09078642 ms (43.532483190266305) 240.9999736 ms (45.52546037334287) 1.00
Normalize R AST 22.13527916 ms (16.750824739582328) 22.044827100000003 ms (17.4420619617221) 1.00
Produce dataflow information 69.29241864 ms (83.77573975398582) 68.41181266 ms (83.25046712198441) 1.01
Total per-file 3522.7436171199997 ms (7841.050850344587) 3603.0243807399997 ms (7958.676569737224) 0.98
Static slicing 7.148039307299094 ms (20.15227532687361) 7.403007833130669 ms (20.923205633042343) 0.97
Reconstruct code 0.23572366315077137 ms (0.15292722560777314) 0.24681178006363166 ms (0.15169934993997963) 0.96
Total per-slice 7.3913504800002325 ms (20.176199460708464) 7.657637977886095 ms (20.95063234618626) 0.97
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.9181372100742089 # 0.9214445180065712 # 1.00
reduction (normalized tokens) 0.884931018000862 # 0.88847659105633 # 1.00
memory (df-graph) 142.5410546875 KiB (146.7038638918548) 142.5463671875 KiB (146.6995040110581) 1.00

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.