Skip to content

Commit

Permalink
Support Library Loads and Installations as Potential Side Effects (#944)
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleoutIce committed Aug 31, 2024
2 parents 9e75557 + 3f5ac40 commit 9a55656
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 39 deletions.
4 changes: 2 additions & 2 deletions src/cli/repl/server/connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ import { DataflowGraph } from '../../../dataflow/graph/graph'
import * as tmp from 'tmp'
import fs from 'fs'
import type { RParseRequests } from '../../../r-bridge/retriever'
import { autoSelectLibrary } from '../../../reconstruct/auto-select/auto-select-defaults'
import { makeMagicCommentHandler } from '../../../reconstruct/auto-select/magic-comments'
import type { LineageRequestMessage, LineageResponseMessage } from './messages/lineage'
import { requestLineageMessage } from './messages/lineage'
import { getLineage } from '../commands/lineage'
import { guard } from '../../../util/assert'
import { doNotAutoSelect } from '../../../reconstruct/auto-select/auto-select-defaults'

/**
* Each connection handles a single client, answering to its requests.
Expand Down Expand Up @@ -227,7 +227,7 @@ export class FlowRServerConnection {

fileInformation.pipeline.updateRequest({
criterion: request.criterion,
autoSelectIf: request.noMagicComments ? autoSelectLibrary : makeMagicCommentHandler(autoSelectLibrary)
autoSelectIf: request.noMagicComments ? doNotAutoSelect : makeMagicCommentHandler(doNotAutoSelect)
})

void fileInformation.pipeline.allRemainingSteps(true).then(results => {
Expand Down
4 changes: 2 additions & 2 deletions src/cli/slicer-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ import type { SingleSlicingCriterion, SlicingCriteria } from '../slicing/criteri
import type { ReconstructionResult } from '../reconstruct/reconstruct'
import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id'
import { stats2string } from '../benchmark/stats/print'
import { autoSelectLibrary } from '../reconstruct/auto-select/auto-select-defaults'
import { makeMagicCommentHandler } from '../reconstruct/auto-select/magic-comments'
import { doNotAutoSelect } from '../reconstruct/auto-select/auto-select-defaults'

export interface SlicerCliOptions {
verbose: boolean
Expand Down Expand Up @@ -48,7 +48,7 @@ async function getSlice() {
options['input-is-text']
? { request: 'text', content: options.input }
: { request: 'file', content: options.input },
options['no-magic-comments'] ? autoSelectLibrary : makeMagicCommentHandler(autoSelectLibrary)
options['no-magic-comments'] ? doNotAutoSelect : makeMagicCommentHandler(doNotAutoSelect)
)

let mappedSlices: { criterion: SingleSlicingCriterion, id: NodeId }[] = []
Expand Down
8 changes: 7 additions & 1 deletion src/dataflow/environments/built-in.ts
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ registerBuiltInFunctions(true, ['[', '[['],
registerBuiltInFunctions(true, ['$', '@'], processAccess, { treatIndicesAsString: true } )
registerBuiltInFunctions(true, ['if', 'ifelse'], processIfThenElse, {} )
registerBuiltInFunctions(true, ['get'], processGet, {} )
registerBuiltInFunctions(false, ['library'], processLibrary, {} )
registerBuiltInFunctions(false, ['library', 'require'], processLibrary, {} )
registerBuiltInFunctions(true, ['<-', '='], processAssignment, { canBeReplacement: true } )
registerBuiltInFunctions(true, [':=', 'assign'], processAssignment, {} )
registerBuiltInFunctions(true, ['delayedAssign'], processAssignment, { quoteSource: true } )
Expand All @@ -203,6 +203,12 @@ registerBuiltInFunctions(true, ['repeat'],
registerBuiltInFunctions(true, ['while'], processWhileLoop, {} )
registerBuiltInFunctions(true, ['options'], defaultBuiltInProcessor, { hasUnknownSideEffects: true, forceArgs: 'all' as const } )
registerBuiltInFunctions(true, ['on.exit', 'sys.on.exit'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
/* library and require is handled above */
registerBuiltInFunctions(true, ['requireNamespace', 'loadNamespace', 'attachNamespace', 'asNamespace'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
/* downloader and installer functions (R, devtools, BiocManager) */
registerBuiltInFunctions(true, ['library.dynam', 'install.packages','install', 'install_github', 'install_gitlab', 'install_bitbucket', 'install_url', 'install_git', 'install_svn', 'install_local', 'install_version', 'update_packages'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )
/* weird env attachments */
registerBuiltInFunctions(true, ['attach'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } )

/* they are all mapped to `<-` but we separate super assignments */
registerReplacementFunctions({ makeMaybe: true }, ['<-', '<<-'], '[', '[[', '$', '@', 'names', 'dimnames', 'attributes', 'attr', 'class', 'levels', 'rownames', 'colnames')
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,21 @@ import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type'
import { wrapArgumentsUnnamed } from '../argument/make-argument'


/* we currently do not mark this as an unknown side effect, as we can enable/disable this with a toggle */
export function processLibrary<OtherInfo>(
name: RSymbol<OtherInfo & ParentInformation>,
args: readonly RFunctionArgument<OtherInfo & ParentInformation>[],
rootId: NodeId,
data: DataflowProcessorInformation<OtherInfo & ParentInformation>
): DataflowInformation {
/* we do not really know what loading the library does and what side effects it causes, hence we mark it as an unknown side effect */
if(args.length !== 1) {
dataflowLogger.warn(`Currently only one-arg library-likes are allows (for ${name.content}), skipping`)
return processKnownFunctionCall({ name, args, rootId, data }).information
return processKnownFunctionCall({ name, args, rootId, data, hasUnknownSideEffect: true }).information
}
const nameToLoad = unpackArgument(args[0])
if(nameToLoad === undefined || nameToLoad.type !== RType.Symbol) {
dataflowLogger.warn('No library name provided, skipping')
return processKnownFunctionCall({ name, args, rootId, data }).information
return processKnownFunctionCall({ name, args, rootId, data, hasUnknownSideEffect: true }).information
}

// treat as a function call but convert the first argument to a string
Expand All @@ -40,6 +40,8 @@ export function processLibrary<OtherInfo>(
str: nameToLoad.content
}
}

return processKnownFunctionCall({ name, args: wrapArgumentsUnnamed([newArg], data.completeAst.idMap), rootId, data }).information
return processKnownFunctionCall({
name, args: wrapArgumentsUnnamed([newArg], data.completeAst.idMap), rootId, data,
hasUnknownSideEffect: true
}).information
}
26 changes: 16 additions & 10 deletions src/dataflow/internal/process/functions/call/known-call-handling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,18 @@ import { dataflowLogger } from '../../../../logger'
import { VertexType } from '../../../../graph/vertex'

export interface ProcessKnownFunctionCallInput<OtherInfo> extends ForceArguments {
readonly name: RSymbol<OtherInfo & ParentInformation>
readonly args: readonly (RNode<OtherInfo & ParentInformation> | RFunctionArgument<OtherInfo & ParentInformation>)[]
readonly rootId: NodeId
readonly data: DataflowProcessorInformation<OtherInfo & ParentInformation>
/* should arguments be processed from right to left? This does not affect the order recorded in the call but of the environments */
readonly reverseOrder?: boolean
readonly name: RSymbol<OtherInfo & ParentInformation>
readonly args: readonly (RNode<OtherInfo & ParentInformation> | RFunctionArgument<OtherInfo & ParentInformation>)[]
readonly rootId: NodeId
readonly data: DataflowProcessorInformation<OtherInfo & ParentInformation>
/** should arguments be processed from right to left? This does not affect the order recorded in the call but of the environments */
readonly reverseOrder?: boolean
/** which arguments are to be marked as {@link EdgeType#NonStandardEvaluation|non-standard-evaluation}? */
readonly markAsNSE?: readonly number[]
/* allows passing a data processor in-between each argument */
readonly patchData?: (data: DataflowProcessorInformation<OtherInfo & ParentInformation>, arg: number) => DataflowProcessorInformation<OtherInfo & ParentInformation>
readonly markAsNSE?: readonly number[]
/** allows passing a data processor in-between each argument */
readonly patchData?: (data: DataflowProcessorInformation<OtherInfo & ParentInformation>, arg: number) => DataflowProcessorInformation<OtherInfo & ParentInformation>
/** Does the call have a side effect that we do not know a lot about which may have further consequences? */
readonly hasUnknownSideEffect?: boolean
}

export interface ProcessKnownFunctionCallResult {
Expand Down Expand Up @@ -56,7 +58,7 @@ export function markNonStandardEvaluationEdges(
}

export function processKnownFunctionCall<OtherInfo>(
{ name,args, rootId,data, reverseOrder = false, markAsNSE = undefined, forceArgs, patchData = d => d }: ProcessKnownFunctionCallInput<OtherInfo>
{ name,args, rootId,data, reverseOrder = false, markAsNSE = undefined, forceArgs, patchData = d => d, hasUnknownSideEffect }: ProcessKnownFunctionCallInput<OtherInfo>
): ProcessKnownFunctionCallResult {
const functionName = processDataflowFor(name, data)

Expand Down Expand Up @@ -85,6 +87,10 @@ export function processKnownFunctionCall<OtherInfo>(
args: reverseOrder ? [...callArgs].reverse() : callArgs
})

if(hasUnknownSideEffect) {
finalGraph.markIdForUnknownSideEffects(rootId)
}

const inIds = remainingReadInArgs
const fnRef = { nodeId: rootId, name: functionCallName, controlDependencies: data.controlDependencies, call: true as const }
inIds.push(fnRef)
Expand Down
16 changes: 1 addition & 15 deletions src/reconstruct/auto-select/auto-select-defaults.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import type { NoInfo, RNode } from '../../r-bridge/lang-4.x/ast/model/model'
import type { RNode } from '../../r-bridge/lang-4.x/ast/model/model'
import type { ParentInformation, NormalizedAst } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'
import { RType } from '../../r-bridge/lang-4.x/ast/model/type'

/**
* The structure of the predicate that should be used to determine
Expand All @@ -19,16 +18,3 @@ export type AutoSelectPredicate = (node: RNode<ParentInformation>, fullAst: Norm
export function doNotAutoSelect(_node: RNode): boolean {
return false
}

const libraryFunctionCall = /^(library|require|((require|load|attach)Namespace))$/

/**
* A variant of the {@link AutoSelectPredicate} which does its best
* to select any kind of library import automatically.
*/
export function autoSelectLibrary<Info = NoInfo>(node: RNode<Info>): boolean {
if(node.type !== RType.FunctionCall || !node.named) {
return false
}
return libraryFunctionCall.test(node.functionName.content)
}
4 changes: 2 additions & 2 deletions src/reconstruct/reconstruct.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import type { StatefulFoldFunctions } from '../r-bridge/lang-4.x/ast/model/proce
import { foldAstStateful } from '../r-bridge/lang-4.x/ast/model/processing/stateful-fold'
import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id'
import type { AutoSelectPredicate } from './auto-select/auto-select-defaults'
import { autoSelectLibrary } from './auto-select/auto-select-defaults'
import { doNotAutoSelect } from './auto-select/auto-select-defaults'

type Selection = ReadonlySet<NodeId>
interface PrettyPrintLine {
Expand Down Expand Up @@ -491,7 +491,7 @@ function removeOuterExpressionListIfApplicable(result: PrettyPrintLine[], linesW
*
* @returns The number of lines for which `autoSelectIf` triggered, as well as the reconstructed code itself.
*/
export function reconstructToCode(ast: NormalizedAst, selection: Selection, autoSelectIf: AutoSelectPredicate = autoSelectLibrary): ReconstructionResult {
export function reconstructToCode(ast: NormalizedAst, selection: Selection, autoSelectIf: AutoSelectPredicate = doNotAutoSelect): ReconstructionResult {
if(reconstructLogger.settings.minLevel <= LogLevel.Trace) {
reconstructLogger.trace(`reconstruct ast with ids: ${JSON.stringify([...selection])}`)
}
Expand Down
4 changes: 2 additions & 2 deletions test/functionality/benchmark/slicer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ cat(d)`
tokensNoComments: { min: 13, max: 35, median: 19, mean: 22.333333333333332, std: 9.285592184789413, total: 67 },
normalizedTokens: { min: 8, max: 19, median: 11, mean: (8+11+19)/3, std: 4.642796092394707, total: 38 },
normalizedTokensNoComments: { min: 8, max: 19, median: 11, mean: (8+11+19)/3, std: 4.642796092394707, total: 38 },
dataflowNodes: { min: 3, max: 14, median: 6, mean: (3+6+14)/3, std: 4.642796092394707, total: 23 },
linesWithAutoSelected: { min: 1, max: 1, median: 1, mean: 1, std: 0, total: 3 } // always select one library statement
dataflowNodes: { min: 5, max: 16, median: 8, mean: (5+8+16)/3, std: 4.642796092394707, total: 29 },
linesWithAutoSelected: { min: 0, max: 0, median: 0, mean: 0, std: 0, total: 0 }
}, statInfo)

assert.deepStrictEqual(stats.perSliceMeasurements.sliceCriteriaSizes, {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -682,5 +682,12 @@ x`)
'x\non.exit(function() 3)', ['1@x'],
'x\non.exit(function() 3)'
)
assertSliced(label('Library Loads and Installations', [
'functions-with-global-side-effects', 'name-normal', 'strings', 'call-normal', 'unnamed-arguments', 'newlines', 'library-loading'
]), shell,
/* w should be included as it defined the package to be loaded by the library call */
'v\nlibrary(x)\nrequire(y)\nw <- "x"\nattachNamespace(w)\nloadNamespace("x")', ['1@v'],
'v\nlibrary(x)\nrequire(y)\nw <- "x"\nattachNamespace(w)\nloadNamespace("x")'
)
})
}))

2 comments on commit 9a55656

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: 9a55656 Previous: f7dd663 Ratio
Retrieve AST from R code 232.9759162727273 ms (97.24980725607145) 238.5188252272727 ms (100.22506468361182) 0.98
Normalize R AST 19.456760227272728 ms (34.10133909844928) 19.296409454545454 ms (33.092243108777645) 1.01
Produce dataflow information 36.96388472727273 ms (79.6515602036736) 36.9739589090909 ms (77.82386121660545) 1.00
Total per-file 797.8512177272728 ms (1419.6467316743372) 776.2117469545454 ms (1469.1651040608185) 1.03
Static slicing 2.1742035660405294 ms (1.311249252650579) 1.1872604452349385 ms (0.9948751822483859) 1.83
Reconstruct code 0.22627408163345789 ms (0.18394678082601015) 0.2468089451648185 ms (0.1946140644840204) 0.92
Total per-slice 2.416743962146326 ms (1.3763076282879023) 1.4516484087621992 ms (1.0470370397968392) 1.66
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.7869724682442361 # 0.7869724682442361 # 1
reduction (normalized tokens) 0.7640044233283717 # 0.7640044233283717 # 1
memory (df-graph) 147.58589311079547 KiB (359.2574768951678) 147.58589311079547 KiB (359.2574768951678) 1

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: 9a55656 Previous: f7dd663 Ratio
Retrieve AST from R code 240.82628366 ms (45.82416232679357) 237.40032316 ms (43.13005941619528) 1.01
Normalize R AST 22.1822889 ms (16.845014085543838) 22.09362788 ms (16.761602459541297) 1.00
Produce dataflow information 68.8968469 ms (83.31038058237489) 68.97634276000001 ms (82.8815602292033) 1.00
Total per-file 3855.0505958400004 ms (7992.375386354442) 3713.15087572 ms (7958.055524113993) 1.04
Static slicing 8.041008380272569 ms (20.008916221797243) 7.663184753343301 ms (20.21369881919582) 1.05
Reconstruct code 0.22075087219097142 ms (0.13854816137233103) 0.23133881371574955 ms (0.1459510437889604) 0.95
Total per-slice 8.269617661835138 ms (20.039413848499013) 7.902366581545092 ms (20.237900525090257) 1.05
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.9054767484661775 # 0.9060203516109888 # 1.00
reduction (normalized tokens) 0.8742607285608068 # 0.8749621691839767 # 1.00
memory (df-graph) 142.38015625 KiB (146.62401913756796) 142.4557421875 KiB (146.62238285216054) 1.00

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.