Skip to content

Commit

Permalink
feat-fix: nested data frame reassignments (#964)
Browse files Browse the repository at this point in the history
  • Loading branch information
EagleoutIce committed Sep 9, 2024
2 parents 953465d + b4ee94c commit af64b76
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,18 @@ export interface AssignmentConfiguration extends ForceArguments {
readonly canBeReplacement?: boolean
}

function findRootAccess<OtherInfo>(node: RNode<OtherInfo & ParentInformation>): RSymbol<OtherInfo & ParentInformation> | undefined {
let current = node
while(current.type === RType.Access) {
current = current.accessed
}
if(current.type === RType.Symbol) {
return current
} else {
return undefined
}
}

/**
* Processes an assignment, i.e., `<target> <- <source>`.
* Handling it as a function call \`&lt;-\` `(<target>, <source>)`.
Expand Down Expand Up @@ -95,6 +107,29 @@ export function processAssignment<OtherInfo>(
dataflowLogger.debug(`Assignment ${name.content} has an access as target => replacement function ${target.lexeme}`)
const replacement = toReplacementSymbol(target, target.operator, config.superAssignment ?? false)
return processAsNamedCall(replacement, data, replacement.content, [toUnnamedArgument(target.accessed, data.completeAst.idMap), ...target.access, source])
} else if(type === RType.Access) {
const rootArg = findRootAccess(target)
if(rootArg) {
const res = processKnownFunctionCall({
name,
args: [rootArg, source],
rootId,
data,
reverseOrder: !config.swapSourceAndTarget,
forceArgs: config.forceArgs
})

return processAssignmentToSymbol<OtherInfo & ParentInformation>({
...config,
nameOfAssignmentFunction: name.content,
source,
target: rootArg,
args: getEffectiveOrder(config, res.processedArguments as [DataflowInformation, DataflowInformation]),
rootId,
data,
information: res.information,
})
}
} else if(type === RType.String) {
return processAssignmentToString(target, args, name, rootId, data, config, source)
}
Expand All @@ -114,7 +149,7 @@ function extractSourceAndTarget<OtherInfo>(args: readonly RFunctionArgument<Othe
}

function produceWrittenNodes<OtherInfo>(rootId: NodeId, target: DataflowInformation, isFunctionDef: boolean, data: DataflowProcessorInformation<OtherInfo>, makeMaybe: boolean): IdentifierDefinition[] {
return target.in.map(ref => ({
return [...target.in, ...target.unknownReferences].map(ref => ({
...ref,
kind: isFunctionDef ? 'function' : 'variable',
definedAt: rootId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,13 @@ print(res)`
assertSliced(label('Loop Re-Iterate', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'normal-definition', 'newlines', 'unnamed-arguments', 'call-normal', 'infix-calls', 'double-bracket-access', 'binary-operator', 'return', 'implicit-return']),
shell, code, ['7@print'], code)
})
describe('Nested dataframe assignments', () => {
const code = `df <- foo()
df$a[x > 3] <- 5
print(df)`
assertSliced(label('Simple reassignment', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'normal-definition', 'newlines', 'unnamed-arguments', 'call-normal', 'infix-calls', 'double-bracket-access', 'binary-operator', 'return', 'implicit-return']),
shell, code, ['3@print'], code)
})
})
describe('Closures', () => {
assertSliced(label('closure w/ default arguments',['name-normal', ...OperatorDatabase['<-'].capabilities, 'formals-default', 'numbers', 'newlines', 'implicit-return', 'normal-definition', 'closures', 'unnamed-arguments']),
Expand Down

2 comments on commit af64b76

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"artificial" Benchmark Suite

Benchmark suite Current: af64b76 Previous: d6d97d3 Ratio
Retrieve AST from R code 240.8191983636364 ms (105.22515412407539) 237.1307347272727 ms (99.33189983148677) 1.02
Normalize R AST 21.532465 ms (38.98152301885162) 20.22688240909091 ms (34.81944475834837) 1.06
Produce dataflow information 38.369665136363636 ms (80.4249376103522) 38.76148281818182 ms (83.37589845299341) 0.99
Total per-file 825.7434033636364 ms (1476.3602891932335) 805.4951149545454 ms (1418.989224836618) 1.03
Static slicing 2.1976596352317292 ms (1.3589054283613344) 2.2549209922472833 ms (1.3467440237997401) 0.97
Reconstruct code 0.23220752098462974 ms (0.18848124683715506) 0.22440795860531199 ms (0.17245303864543157) 1.03
Total per-slice 2.4458621066662363 ms (1.4301686567610754) 2.4968072479681567 ms (1.402430824638579) 0.98
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.7869360165281424 # 0.7869360165281424 # 1
reduction (normalized tokens) 0.7639690077689504 # 0.7639690077689504 # 1
memory (df-graph) 147.42458274147728 KiB (358.6827375397903) 147.42458274147728 KiB (358.6827375397903) 1

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"social-science" Benchmark Suite

Benchmark suite Current: af64b76 Previous: d6d97d3 Ratio
Retrieve AST from R code 243.10358606 ms (44.46726267978382) 238.45832514 ms (44.01516969239197) 1.02
Normalize R AST 22.47255272 ms (17.05977358176205) 21.8018916 ms (16.153242755954302) 1.03
Produce dataflow information 74.59714026 ms (88.56944668311371) 73.37089498 ms (86.87821424333349) 1.02
Total per-file 10889.61375746 ms (52279.57961624704) 10697.70673224 ms (51757.12122600268) 1.02
Static slicing 21.167126531096958 ms (78.34797815172462) 20.852021200476752 ms (78.34298008513575) 1.02
Reconstruct code 0.2283159833575344 ms (0.1445116475575022) 0.22516315606876278 ms (0.13784797448770958) 1.01
Total per-slice 21.40326101255138 ms (78.37538091943358) 21.08480712275338 ms (78.36487556608114) 1.02
failed to reconstruct/re-parse 0 # 0 # 1
times hit threshold 0 # 0 # 1
reduction (characters) 0.8925178291385903 # 0.8944619525615458 # 1.00
reduction (normalized tokens) 0.8508531059077417 # 0.8534320485134076 # 1.00
memory (df-graph) 145.84685546875 KiB (153.44623089940248) 146.770703125 KiB (154.0029022815246) 0.99

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.