feat-fix: nested data frame reassignments (#964)

flowr-analysis · Sep 9, 2024 · af64b76 · af64b76 · github-actions · Sep 9, 2024
2 parents 953465d + b4ee94c
commit af64b76
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 1 deletion.
diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts
@@ -52,6 +52,18 @@ export interface AssignmentConfiguration extends ForceArguments {
 	readonly canBeReplacement?:    boolean
 }
 
+function findRootAccess<OtherInfo>(node: RNode<OtherInfo & ParentInformation>): RSymbol<OtherInfo & ParentInformation> | undefined {
+	let current = node
+	while(current.type === RType.Access) {
+		current = current.accessed
+	}
+	if(current.type === RType.Symbol) {
+		return current
+	} else {
+		return undefined
+	}
+}
+
 /**
  * Processes an assignment, i.e., `<target> <- <source>`.
  * Handling it as a function call \`&lt;-\` `(<target>, <source>)`.
@@ -95,6 +107,29 @@ export function processAssignment<OtherInfo>(
 		dataflowLogger.debug(`Assignment ${name.content} has an access as target => replacement function ${target.lexeme}`)
 		const replacement = toReplacementSymbol(target, target.operator, config.superAssignment ?? false)
 		return processAsNamedCall(replacement, data, replacement.content, [toUnnamedArgument(target.accessed, data.completeAst.idMap), ...target.access, source])
+	} else if(type === RType.Access) {
+		const rootArg = findRootAccess(target)
+		if(rootArg) {
+			const res = processKnownFunctionCall({
+				name,
+				args: 			     [rootArg, source],
+				rootId,
+				data,
+				reverseOrder: !config.swapSourceAndTarget,
+				forceArgs:    config.forceArgs
+			})
+
+			return processAssignmentToSymbol<OtherInfo & ParentInformation>({
+				...config,
+				nameOfAssignmentFunction: name.content,
+				source,
+				target:                   rootArg,
+				args:                     getEffectiveOrder(config, res.processedArguments as [DataflowInformation, DataflowInformation]),
+				rootId,
+				data,
+				information:              res.information,
+			})
+		}
 	} else if(type === RType.String) {
 		return processAssignmentToString(target, args, name, rootId, data, config, source)
 	}
@@ -114,7 +149,7 @@ function extractSourceAndTarget<OtherInfo>(args: readonly RFunctionArgument<Othe
 }
 
 function produceWrittenNodes<OtherInfo>(rootId: NodeId, target: DataflowInformation, isFunctionDef: boolean, data: DataflowProcessorInformation<OtherInfo>, makeMaybe: boolean): IdentifierDefinition[] {
-	return target.in.map(ref => ({
+	return [...target.in, ...target.unknownReferences].map(ref => ({
 		...ref,
 		kind:                isFunctionDef ? 'function' : 'variable',
 		definedAt:           rootId,

diff --git a/test/functionality/slicing/static-program-slices/calls-tests.ts b/test/functionality/slicing/static-program-slices/calls-tests.ts
@@ -688,6 +688,13 @@ print(res)`
 			assertSliced(label('Loop Re-Iterate', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'normal-definition', 'newlines', 'unnamed-arguments', 'call-normal', 'infix-calls', 'double-bracket-access', 'binary-operator', 'return', 'implicit-return']),
 				shell, code, ['7@print'], code)
 		})
+		describe('Nested dataframe assignments', () => {
+			const code = `df <- foo()
+df$a[x > 3] <- 5
+print(df)`
+			assertSliced(label('Simple reassignment', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'normal-definition', 'newlines', 'unnamed-arguments', 'call-normal', 'infix-calls', 'double-bracket-access', 'binary-operator', 'return', 'implicit-return']),
+				shell, code, ['3@print'], code)
+		})
 	})
 	describe('Closures', () => {
 		assertSliced(label('closure w/ default arguments',['name-normal', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'numbers', 'newlines', 'implicit-return', 'normal-definition', 'closures', 'unnamed-arguments']),
Benchmark suite	Current: `af64b76`	Previous: `d6d97d3`	Ratio
`Retrieve AST from R code`	`240.8191983636364` ms (`105.22515412407539`)	`237.1307347272727` ms (`99.33189983148677`)	`1.02`
`Normalize R AST`	`21.532465` ms (`38.98152301885162`)	`20.22688240909091` ms (`34.81944475834837`)	`1.06`
`Produce dataflow information`	`38.369665136363636` ms (`80.4249376103522`)	`38.76148281818182` ms (`83.37589845299341`)	`0.99`
`Total per-file`	`825.7434033636364` ms (`1476.3602891932335`)	`805.4951149545454` ms (`1418.989224836618`)	`1.03`
`Static slicing`	`2.1976596352317292` ms (`1.3589054283613344`)	`2.2549209922472833` ms (`1.3467440237997401`)	`0.97`
`Reconstruct code`	`0.23220752098462974` ms (`0.18848124683715506`)	`0.22440795860531199` ms (`0.17245303864543157`)	`1.03`
`Total per-slice`	`2.4458621066662363` ms (`1.4301686567610754`)	`2.4968072479681567` ms (`1.402430824638579`)	`0.98`
`failed to reconstruct/re-parse`	`0` #	`0` #	`1`
`times hit threshold`	`0` #	`0` #	`1`
`reduction (characters)`	`0.7869360165281424` #	`0.7869360165281424` #	`1`
`reduction (normalized tokens)`	`0.7639690077689504` #	`0.7639690077689504` #	`1`
`memory (df-graph)`	`147.42458274147728` KiB (`358.6827375397903`)	`147.42458274147728` KiB (`358.6827375397903`)	`1`
Benchmark suite	Current: `af64b76`	Previous: `d6d97d3`	Ratio
`Retrieve AST from R code`	`243.10358606` ms (`44.46726267978382`)	`238.45832514` ms (`44.01516969239197`)	`1.02`
`Normalize R AST`	`22.47255272` ms (`17.05977358176205`)	`21.8018916` ms (`16.153242755954302`)	`1.03`
`Produce dataflow information`	`74.59714026` ms (`88.56944668311371`)	`73.37089498` ms (`86.87821424333349`)	`1.02`
`Total per-file`	`10889.61375746` ms (`52279.57961624704`)	`10697.70673224` ms (`51757.12122600268`)	`1.02`
`Static slicing`	`21.167126531096958` ms (`78.34797815172462`)	`20.852021200476752` ms (`78.34298008513575`)	`1.02`
`Reconstruct code`	`0.2283159833575344` ms (`0.1445116475575022`)	`0.22516315606876278` ms (`0.13784797448770958`)	`1.01`
`Total per-slice`	`21.40326101255138` ms (`78.37538091943358`)	`21.08480712275338` ms (`78.36487556608114`)	`1.02`
`failed to reconstruct/re-parse`	`0` #	`0` #	`1`
`times hit threshold`	`0` #	`0` #	`1`
`reduction (characters)`	`0.8925178291385903` #	`0.8944619525615458` #	`1.00`
`reduction (normalized tokens)`	`0.8508531059077417` #	`0.8534320485134076` #	`1.00`
`memory (df-graph)`	`145.84685546875` KiB (`153.44623089940248`)	`146.770703125` KiB (`154.0029022815246`)	`0.99`