From c79b5dbb986719803e2fbbd6a2ab26006f29e615 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 31 Aug 2024 11:09:53 +0200 Subject: [PATCH 1/4] feat(wip-select): deprecate and remove the old automatic selector --- src/cli/repl/server/connection.ts | 4 ++-- src/cli/slicer-app.ts | 4 ++-- .../auto-select/auto-select-defaults.ts | 16 +--------------- src/reconstruct/reconstruct.ts | 4 ++-- 4 files changed, 7 insertions(+), 21 deletions(-) diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index 4eb23e2c3c..cda28db3f2 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -33,12 +33,12 @@ import { DataflowGraph } from '../../../dataflow/graph/graph' import * as tmp from 'tmp' import fs from 'fs' import type { RParseRequests } from '../../../r-bridge/retriever' -import { autoSelectLibrary } from '../../../reconstruct/auto-select/auto-select-defaults' import { makeMagicCommentHandler } from '../../../reconstruct/auto-select/magic-comments' import type { LineageRequestMessage, LineageResponseMessage } from './messages/lineage' import { requestLineageMessage } from './messages/lineage' import { getLineage } from '../commands/lineage' import { guard } from '../../../util/assert' +import { doNotAutoSelect } from '../../../reconstruct/auto-select/auto-select-defaults' /** * Each connection handles a single client, answering to its requests. @@ -227,7 +227,7 @@ export class FlowRServerConnection { fileInformation.pipeline.updateRequest({ criterion: request.criterion, - autoSelectIf: request.noMagicComments ? autoSelectLibrary : makeMagicCommentHandler(autoSelectLibrary) + autoSelectIf: request.noMagicComments ? doNotAutoSelect : makeMagicCommentHandler(doNotAutoSelect) }) void fileInformation.pipeline.allRemainingSteps(true).then(results => { diff --git a/src/cli/slicer-app.ts b/src/cli/slicer-app.ts index 99e9075066..3620c35747 100644 --- a/src/cli/slicer-app.ts +++ b/src/cli/slicer-app.ts @@ -11,8 +11,8 @@ import type { SingleSlicingCriterion, SlicingCriteria } from '../slicing/criteri import type { ReconstructionResult } from '../reconstruct/reconstruct' import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id' import { stats2string } from '../benchmark/stats/print' -import { autoSelectLibrary } from '../reconstruct/auto-select/auto-select-defaults' import { makeMagicCommentHandler } from '../reconstruct/auto-select/magic-comments' +import { doNotAutoSelect } from '../reconstruct/auto-select/auto-select-defaults' export interface SlicerCliOptions { verbose: boolean @@ -48,7 +48,7 @@ async function getSlice() { options['input-is-text'] ? { request: 'text', content: options.input } : { request: 'file', content: options.input }, - options['no-magic-comments'] ? autoSelectLibrary : makeMagicCommentHandler(autoSelectLibrary) + options['no-magic-comments'] ? doNotAutoSelect : makeMagicCommentHandler(doNotAutoSelect) ) let mappedSlices: { criterion: SingleSlicingCriterion, id: NodeId }[] = [] diff --git a/src/reconstruct/auto-select/auto-select-defaults.ts b/src/reconstruct/auto-select/auto-select-defaults.ts index d82afb4548..c703d8ddc9 100644 --- a/src/reconstruct/auto-select/auto-select-defaults.ts +++ b/src/reconstruct/auto-select/auto-select-defaults.ts @@ -1,6 +1,5 @@ -import type { NoInfo, RNode } from '../../r-bridge/lang-4.x/ast/model/model' +import type { RNode } from '../../r-bridge/lang-4.x/ast/model/model' import type { ParentInformation, NormalizedAst } from '../../r-bridge/lang-4.x/ast/model/processing/decorate' -import { RType } from '../../r-bridge/lang-4.x/ast/model/type' /** * The structure of the predicate that should be used to determine @@ -19,16 +18,3 @@ export type AutoSelectPredicate = (node: RNode, fullAst: Norm export function doNotAutoSelect(_node: RNode): boolean { return false } - -const libraryFunctionCall = /^(library|require|((require|load|attach)Namespace))$/ - -/** - * A variant of the {@link AutoSelectPredicate} which does its best - * to select any kind of library import automatically. - */ -export function autoSelectLibrary(node: RNode): boolean { - if(node.type !== RType.FunctionCall || !node.named) { - return false - } - return libraryFunctionCall.test(node.functionName.content) -} diff --git a/src/reconstruct/reconstruct.ts b/src/reconstruct/reconstruct.ts index aa5e70b2fd..4b95b50e30 100644 --- a/src/reconstruct/reconstruct.ts +++ b/src/reconstruct/reconstruct.ts @@ -31,7 +31,7 @@ import type { StatefulFoldFunctions } from '../r-bridge/lang-4.x/ast/model/proce import { foldAstStateful } from '../r-bridge/lang-4.x/ast/model/processing/stateful-fold' import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id' import type { AutoSelectPredicate } from './auto-select/auto-select-defaults' -import { autoSelectLibrary } from './auto-select/auto-select-defaults' +import { doNotAutoSelect } from './auto-select/auto-select-defaults' type Selection = ReadonlySet interface PrettyPrintLine { @@ -491,7 +491,7 @@ function removeOuterExpressionListIfApplicable(result: PrettyPrintLine[], linesW * * @returns The number of lines for which `autoSelectIf` triggered, as well as the reconstructed code itself. */ -export function reconstructToCode(ast: NormalizedAst, selection: Selection, autoSelectIf: AutoSelectPredicate = autoSelectLibrary): ReconstructionResult { +export function reconstructToCode(ast: NormalizedAst, selection: Selection, autoSelectIf: AutoSelectPredicate = doNotAutoSelect): ReconstructionResult { if(reconstructLogger.settings.minLevel <= LogLevel.Trace) { reconstructLogger.trace(`reconstruct ast with ids: ${JSON.stringify([...selection])}`) } From 05640957134554ad8994c04bc09985d9849a80f1 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 31 Aug 2024 11:42:49 +0200 Subject: [PATCH 2/4] feat: side-effects for common installer functions --- src/dataflow/environments/built-in.ts | 4 +++ .../call/built-in/built-in-library.ts | 12 +++++---- .../functions/call/known-call-handling.ts | 26 ++++++++++++------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/src/dataflow/environments/built-in.ts b/src/dataflow/environments/built-in.ts index 88bb188e95..e47607775e 100644 --- a/src/dataflow/environments/built-in.ts +++ b/src/dataflow/environments/built-in.ts @@ -203,6 +203,10 @@ registerBuiltInFunctions(true, ['repeat'], registerBuiltInFunctions(true, ['while'], processWhileLoop, {} ) registerBuiltInFunctions(true, ['options'], defaultBuiltInProcessor, { hasUnknownSideEffects: true, forceArgs: 'all' as const } ) registerBuiltInFunctions(true, ['on.exit', 'sys.on.exit'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } ) +/* library is handled above */ +registerBuiltInFunctions(true, ['require','requireNamespace', 'loadNamespace', 'attachNamespace'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } ) +/* downloader and installer functions (R, devtools, BiocManager) */ +registerBuiltInFunctions(true, ['install.packages','install', 'install_github', 'install_gitlab', 'install_bitbucket', 'install_url', 'install_git', 'install_svn', 'install_local', 'install_version', 'update_packages'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } ) /* they are all mapped to `<-` but we separate super assignments */ registerReplacementFunctions({ makeMaybe: true }, ['<-', '<<-'], '[', '[[', '$', '@', 'names', 'dimnames', 'attributes', 'attr', 'class', 'levels', 'rownames', 'colnames') diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-library.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-library.ts index 2518da9e2f..560d66783b 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-library.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-library.ts @@ -12,21 +12,21 @@ import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type' import { wrapArgumentsUnnamed } from '../argument/make-argument' -/* we currently do not mark this as an unknown side effect, as we can enable/disable this with a toggle */ export function processLibrary( name: RSymbol, args: readonly RFunctionArgument[], rootId: NodeId, data: DataflowProcessorInformation ): DataflowInformation { + /* we do not really know what loading the library does and what side effects it causes, hence we mark it as an unknown side effect */ if(args.length !== 1) { dataflowLogger.warn(`Currently only one-arg library-likes are allows (for ${name.content}), skipping`) - return processKnownFunctionCall({ name, args, rootId, data }).information + return processKnownFunctionCall({ name, args, rootId, data, hasUnknownSideEffect: true }).information } const nameToLoad = unpackArgument(args[0]) if(nameToLoad === undefined || nameToLoad.type !== RType.Symbol) { dataflowLogger.warn('No library name provided, skipping') - return processKnownFunctionCall({ name, args, rootId, data }).information + return processKnownFunctionCall({ name, args, rootId, data, hasUnknownSideEffect: true }).information } // treat as a function call but convert the first argument to a string @@ -40,6 +40,8 @@ export function processLibrary( str: nameToLoad.content } } - - return processKnownFunctionCall({ name, args: wrapArgumentsUnnamed([newArg], data.completeAst.idMap), rootId, data }).information + return processKnownFunctionCall({ + name, args: wrapArgumentsUnnamed([newArg], data.completeAst.idMap), rootId, data, + hasUnknownSideEffect: true + }).information } diff --git a/src/dataflow/internal/process/functions/call/known-call-handling.ts b/src/dataflow/internal/process/functions/call/known-call-handling.ts index 9156597c37..7032c3b9a1 100644 --- a/src/dataflow/internal/process/functions/call/known-call-handling.ts +++ b/src/dataflow/internal/process/functions/call/known-call-handling.ts @@ -16,16 +16,18 @@ import { dataflowLogger } from '../../../../logger' import { VertexType } from '../../../../graph/vertex' export interface ProcessKnownFunctionCallInput extends ForceArguments { - readonly name: RSymbol - readonly args: readonly (RNode | RFunctionArgument)[] - readonly rootId: NodeId - readonly data: DataflowProcessorInformation - /* should arguments be processed from right to left? This does not affect the order recorded in the call but of the environments */ - readonly reverseOrder?: boolean + readonly name: RSymbol + readonly args: readonly (RNode | RFunctionArgument)[] + readonly rootId: NodeId + readonly data: DataflowProcessorInformation + /** should arguments be processed from right to left? This does not affect the order recorded in the call but of the environments */ + readonly reverseOrder?: boolean /** which arguments are to be marked as {@link EdgeType#NonStandardEvaluation|non-standard-evaluation}? */ - readonly markAsNSE?: readonly number[] - /* allows passing a data processor in-between each argument */ - readonly patchData?: (data: DataflowProcessorInformation, arg: number) => DataflowProcessorInformation + readonly markAsNSE?: readonly number[] + /** allows passing a data processor in-between each argument */ + readonly patchData?: (data: DataflowProcessorInformation, arg: number) => DataflowProcessorInformation + /** Does the call have a side effect that we do not know a lot about which may have further consequences? */ + readonly hasUnknownSideEffect?: boolean } export interface ProcessKnownFunctionCallResult { @@ -56,7 +58,7 @@ export function markNonStandardEvaluationEdges( } export function processKnownFunctionCall( - { name,args, rootId,data, reverseOrder = false, markAsNSE = undefined, forceArgs, patchData = d => d }: ProcessKnownFunctionCallInput + { name,args, rootId,data, reverseOrder = false, markAsNSE = undefined, forceArgs, patchData = d => d, hasUnknownSideEffect }: ProcessKnownFunctionCallInput ): ProcessKnownFunctionCallResult { const functionName = processDataflowFor(name, data) @@ -85,6 +87,10 @@ export function processKnownFunctionCall( args: reverseOrder ? [...callArgs].reverse() : callArgs }) + if(hasUnknownSideEffect) { + finalGraph.markIdForUnknownSideEffects(rootId) + } + const inIds = remainingReadInArgs const fnRef = { nodeId: rootId, name: functionCallName, controlDependencies: data.controlDependencies, call: true as const } inIds.push(fnRef) From 7d12d63948d80e6c989ff2f762b34090a5cd9d5c Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 31 Aug 2024 11:48:30 +0200 Subject: [PATCH 3/4] test: adapt tests to reflect unknown side effects for libraries --- test/functionality/benchmark/slicer.spec.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/functionality/benchmark/slicer.spec.ts b/test/functionality/benchmark/slicer.spec.ts index 04defce4f0..dfc6a2c47b 100644 --- a/test/functionality/benchmark/slicer.spec.ts +++ b/test/functionality/benchmark/slicer.spec.ts @@ -135,8 +135,8 @@ cat(d)` tokensNoComments: { min: 13, max: 35, median: 19, mean: 22.333333333333332, std: 9.285592184789413, total: 67 }, normalizedTokens: { min: 8, max: 19, median: 11, mean: (8+11+19)/3, std: 4.642796092394707, total: 38 }, normalizedTokensNoComments: { min: 8, max: 19, median: 11, mean: (8+11+19)/3, std: 4.642796092394707, total: 38 }, - dataflowNodes: { min: 3, max: 14, median: 6, mean: (3+6+14)/3, std: 4.642796092394707, total: 23 }, - linesWithAutoSelected: { min: 1, max: 1, median: 1, mean: 1, std: 0, total: 3 } // always select one library statement + dataflowNodes: { min: 5, max: 16, median: 8, mean: (5+8+16)/3, std: 4.642796092394707, total: 29 }, + linesWithAutoSelected: { min: 0, max: 0, median: 0, mean: 0, std: 0, total: 0 } }, statInfo) assert.deepStrictEqual(stats.perSliceMeasurements.sliceCriteriaSizes, { From 3f5ac40b2f9f8fda14a7fd57ee63f23ba5deabd3 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 31 Aug 2024 12:01:05 +0200 Subject: [PATCH 4/4] test: have some tests for the library loading --- src/dataflow/environments/built-in.ts | 10 ++++++---- .../slicing/static-program-slices/calls-tests.ts | 7 +++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/dataflow/environments/built-in.ts b/src/dataflow/environments/built-in.ts index e47607775e..9a23e4a062 100644 --- a/src/dataflow/environments/built-in.ts +++ b/src/dataflow/environments/built-in.ts @@ -186,7 +186,7 @@ registerBuiltInFunctions(true, ['[', '[['], registerBuiltInFunctions(true, ['$', '@'], processAccess, { treatIndicesAsString: true } ) registerBuiltInFunctions(true, ['if', 'ifelse'], processIfThenElse, {} ) registerBuiltInFunctions(true, ['get'], processGet, {} ) -registerBuiltInFunctions(false, ['library'], processLibrary, {} ) +registerBuiltInFunctions(false, ['library', 'require'], processLibrary, {} ) registerBuiltInFunctions(true, ['<-', '='], processAssignment, { canBeReplacement: true } ) registerBuiltInFunctions(true, [':=', 'assign'], processAssignment, {} ) registerBuiltInFunctions(true, ['delayedAssign'], processAssignment, { quoteSource: true } ) @@ -203,10 +203,12 @@ registerBuiltInFunctions(true, ['repeat'], registerBuiltInFunctions(true, ['while'], processWhileLoop, {} ) registerBuiltInFunctions(true, ['options'], defaultBuiltInProcessor, { hasUnknownSideEffects: true, forceArgs: 'all' as const } ) registerBuiltInFunctions(true, ['on.exit', 'sys.on.exit'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } ) -/* library is handled above */ -registerBuiltInFunctions(true, ['require','requireNamespace', 'loadNamespace', 'attachNamespace'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } ) +/* library and require is handled above */ +registerBuiltInFunctions(true, ['requireNamespace', 'loadNamespace', 'attachNamespace', 'asNamespace'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } ) /* downloader and installer functions (R, devtools, BiocManager) */ -registerBuiltInFunctions(true, ['install.packages','install', 'install_github', 'install_gitlab', 'install_bitbucket', 'install_url', 'install_git', 'install_svn', 'install_local', 'install_version', 'update_packages'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } ) +registerBuiltInFunctions(true, ['library.dynam', 'install.packages','install', 'install_github', 'install_gitlab', 'install_bitbucket', 'install_url', 'install_git', 'install_svn', 'install_local', 'install_version', 'update_packages'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } ) +/* weird env attachments */ +registerBuiltInFunctions(true, ['attach'], defaultBuiltInProcessor, { hasUnknownSideEffects: true } ) /* they are all mapped to `<-` but we separate super assignments */ registerReplacementFunctions({ makeMaybe: true }, ['<-', '<<-'], '[', '[[', '$', '@', 'names', 'dimnames', 'attributes', 'attr', 'class', 'levels', 'rownames', 'colnames') diff --git a/test/functionality/slicing/static-program-slices/calls-tests.ts b/test/functionality/slicing/static-program-slices/calls-tests.ts index 66728ca632..22749c323f 100644 --- a/test/functionality/slicing/static-program-slices/calls-tests.ts +++ b/test/functionality/slicing/static-program-slices/calls-tests.ts @@ -682,5 +682,12 @@ x`) 'x\non.exit(function() 3)', ['1@x'], 'x\non.exit(function() 3)' ) + assertSliced(label('Library Loads and Installations', [ + 'functions-with-global-side-effects', 'name-normal', 'strings', 'call-normal', 'unnamed-arguments', 'newlines', 'library-loading' + ]), shell, + /* w should be included as it defined the package to be loaded by the library call */ + 'v\nlibrary(x)\nrequire(y)\nw <- "x"\nattachNamespace(w)\nloadNamespace("x")', ['1@v'], + 'v\nlibrary(x)\nrequire(y)\nw <- "x"\nattachNamespace(w)\nloadNamespace("x")' + ) }) }))