diff --git a/package.json b/package.json index f9e23dd6e9..6a63c7bbee 100644 --- a/package.json +++ b/package.json @@ -15,7 +15,7 @@ "build": "tsc --project .", "lint": "eslint src/ test/", "doc": "typedoc", - "test": "nyc --no-clean mocha --require ts-node/register --timeout 10000 \"test/**/*.spec.ts\"", + "test": "nyc --no-clean mocha --require ts-node/register --timeout 60000 \"test/**/*.spec.ts\"", "performance-test": "func() { cd test/performance/ && bash run-all-suites.sh $1 $2; cd ../../; }; func", "test-full": "npm run test -- --test-installation" }, diff --git a/src/cli/statistics-app.ts b/src/cli/statistics-app.ts index dc08594249..d33e4e4dbe 100644 --- a/src/cli/statistics-app.ts +++ b/src/cli/statistics-app.ts @@ -107,7 +107,7 @@ async function getStats() { processedFeatures, allRFilesFrom(options.input, options.limit) ) - console.warn(`skipped ${stats.meta.skipped.length} requests due to errors (run with logs to get more info)`) + console.warn(`skipped ${stats.meta.failedRequests.length} requests due to errors (run with logs to get more info)`) printFeatureStatistics(stats, processedFeatures) shell.close() diff --git a/src/statistics/features/feature.ts b/src/statistics/features/feature.ts index c247e64c5c..68726d566e 100644 --- a/src/statistics/features/feature.ts +++ b/src/statistics/features/feature.ts @@ -1,3 +1,10 @@ +/** + * This module holds the definition of what a {@link Feature} that can be extracted from an R AST is. + * + * Furthermore, it contains the definition of all features that are known in {@link ALL_FEATURES}. + * + * @module + */ import { assignments, comments, @@ -11,6 +18,9 @@ import { } from './supported' import { EvalOptions } from 'xpath-ts2/src/parse-api' import { MergeableRecord } from '../../util/objects' +import { NormalizedAst } from '../../r-bridge' +import { DataflowInformation } from '../../dataflow/internal/info' +import { DeepReadonly } from 'ts-essentials' /** * Maps each sub-feature name to the number of occurrences of that sub-feature. @@ -20,23 +30,47 @@ import { MergeableRecord } from '../../util/objects' */ export type FeatureInfo = Record & MergeableRecord + +/** + * The information and context that a {@link FeatureProcessor} may operate in. + */ +export interface FeatureProcessorInput extends MergeableRecord { + /** The XML Document representing the parsed (non-normalized) R AST */ + readonly parsedRAst: Document, + /** The R AST, after the normalization step */ + readonly normalizedRAst: DeepReadonly, + /** The dataflow information for the given input */ + readonly dataflow: DeepReadonly, + /** The filepath that the document originated from (if present, may be undefined if the input was provided as text) */ + readonly filepath: string | undefined +} + +/** + * A function that processes the analysis results of a document and returns the feature information. + */ +export type FeatureProcessor = (existing: T, input: FeatureProcessorInput) => T + /** * A feature is something to be retrieved by the statistics. * - * @typeParam T - the type of what should be collected for the feature + * @typeParam T - The type of what should be collected for the feature + * + * @see ALL_FEATURES */ export interface Feature { - /** a descriptive, yet unique name of the feature */ + /** A descriptive, yet unique name of the feature */ readonly name: string - /** a description of the feature */ + /** A description of the feature */ readonly description: string - /** a function that retrieves the feature in the document appends it to the existing feature set (we could use a monoid :D), the filepath corresponds to the active file (if any) */ - process: (existing: T, input: Document, filepath: string | undefined) => T - /** values to start the existing track from */ - initialValue() : T + /** A function that retrieves the feature in the document appends it to the existing feature set (we could use a monoid :D), the filepath corresponds to the active file (if any) */ + process: FeatureProcessor + /** Values to start the existing track from */ + initialValue: T } -// eslint-disable-next-line @typescript-eslint/no-explicit-any +/** + * The source of truth for all features that are supported by the statistics. + */ export const ALL_FEATURES = { usedPackages: usedPackages, comments: comments, diff --git a/src/statistics/features/supported/assignments.ts b/src/statistics/features/supported/assignments.ts index 8544ab1ea1..0cd1d3e160 100644 --- a/src/statistics/features/supported/assignments.ts +++ b/src/statistics/features/supported/assignments.ts @@ -1,20 +1,18 @@ -import { Feature, FeatureInfo, Query } from '../feature' +import { Feature, FeatureProcessorInput, Query } from '../feature' import * as xpath from 'xpath-ts2' -import { append } from '../../output' +import { appendStatisticsFile } from '../../output' +import { Writable } from 'ts-essentials' -export interface AssignmentInfo extends FeatureInfo { - assignmentOperator: number - nestedOperatorAssignment: number - directlyNestedOperatorAssignment: number - specialAssignmentOps: number -} -const initialAssignmentInfo = (): AssignmentInfo => ({ +const initialAssignmentInfo = { assignmentOperator: 0, specialAssignmentOps: 0, nestedOperatorAssignment: 0, directlyNestedOperatorAssignment: 0 -}) +} + +export type AssignmentInfo = Writable + const defaultOperatorAssignmentQuery: Query = xpath.parse(`//EQ_ASSIGN|//LEFT_ASSIGN|//RIGHT_ASSIGN`) // either <-/<<-/=, with a nested rhs, or ->/->> with a nested lhs @@ -66,19 +64,19 @@ export const assignments: Feature = { name: 'Assignments', description: 'all ways to assign something in R', - process(existing: AssignmentInfo, input: Document, filepath: string | undefined): AssignmentInfo { - const assignmentOperators = defaultOperatorAssignmentQuery.select({ node: input }) - const nestedOperators = nestedOperatorAssignmentQuery.select({ node: input }) - const directlyNestedOperators = directlyNestedOperatorAssignmentQuery.select({ node: input }) - const specialAssignmentOps = bracketAssignQuery.select({ node: input }).map(enrichOpForBracketAssign) + process(existing: AssignmentInfo, input: FeatureProcessorInput): AssignmentInfo { + const assignmentOperators = defaultOperatorAssignmentQuery.select({ node: input.parsedRAst }) + const nestedOperators = nestedOperatorAssignmentQuery.select({ node: input.parsedRAst }) + const directlyNestedOperators = directlyNestedOperatorAssignmentQuery.select({ node: input.parsedRAst }) + const specialAssignmentOps = bracketAssignQuery.select({ node: input.parsedRAst }).map(enrichOpForBracketAssign) existing.nestedOperatorAssignment += nestedOperators.length existing.directlyNestedOperatorAssignment += directlyNestedOperators.length existing.assignmentOperator += assignmentOperators.length existing.specialAssignmentOps += specialAssignmentOps.length - append(this.name, 'assignmentOperator', assignmentOperators, filepath) - append(this.name, 'specialAssignmentOps', specialAssignmentOps, filepath) + appendStatisticsFile(this.name, 'assignmentOperator', assignmentOperators, input.filepath) + appendStatisticsFile(this.name, 'specialAssignmentOps', specialAssignmentOps, input.filepath) return existing }, diff --git a/src/statistics/features/supported/comments.ts b/src/statistics/features/supported/comments.ts index 202c6d5353..5c00c280c8 100644 --- a/src/statistics/features/supported/comments.ts +++ b/src/statistics/features/supported/comments.ts @@ -1,24 +1,11 @@ -import { Feature, FeatureInfo, Query } from '../feature' +import { Feature, FeatureProcessorInput, Query } from '../feature' import * as xpath from 'xpath-ts2' import { guard, isNotNull, isNotUndefined } from '../../../util/assert' -import { append } from '../../output' - -export interface CommentInfo extends FeatureInfo { - totalAmount: number - roxygenComments: number - import: number - importFrom: number - importMethodsFrom: number - importClassesFrom: number - export: number - exportClass: number - exportMethod: number - exportS3Method: number - exportPattern: number - useDynLib: number -} +import { appendStatisticsFile } from '../../output' +import { Writable } from 'ts-essentials' + -const initialCommentInfo = (): CommentInfo => ({ +const initialCommentInfo = { totalAmount: 0, roxygenComments: 0, import: 0, @@ -31,7 +18,10 @@ const initialCommentInfo = (): CommentInfo => ({ exportMethod: 0, exportS3Method: 0, exportPattern: 0 -}) +} + +export type CommentInfo = Writable + const commentQuery: Query = xpath.parse('//COMMENT') @@ -54,7 +44,7 @@ const exportPatternRegex = /^'\s*@exportPattern/ function processRoxygenImport(existing: CommentInfo, commentsText: string[], filepath: string | undefined) { const packages = commentsText.map(text => importRegex.exec(text)?.groups?.package).filter(isNotUndefined) existing.import += packages.length - append(comments.name, 'import', packages, filepath, true) + appendStatisticsFile(comments.name, 'import', packages, filepath, true) } function processWithRegex(commentsText: string[], existing: CommentInfo, regex: RegExp): string[] { @@ -68,19 +58,19 @@ function processWithRegex(commentsText: string[], existing: CommentInfo, regex: function processRoxygenImportFrom(existing: CommentInfo, commentsText: string[], filepath: string | undefined) { const result = processWithRegex(commentsText, existing, importFromRegex) existing.importFrom += result.length - append(comments.name, 'importFrom', result, filepath, true) + appendStatisticsFile(comments.name, 'importFrom', result, filepath, true) } function processRoxygenImportClassesFrom(existing: CommentInfo, commentsText: string[], filepath: string | undefined) { const result = processWithRegex(commentsText, existing, importClassesFromRegex) existing.importClassesFrom += result.length - append(comments.name, 'importClassesFrom', result, filepath, true) + appendStatisticsFile(comments.name, 'importClassesFrom', result, filepath, true) } function processRoxygenImportMethodsFrom(existing: CommentInfo, commentsText: string[], filepath: string | undefined) { const result = processWithRegex(commentsText, existing, importMethodsFrom) existing.importMethodsFrom += result.length - append(comments.name, 'importMethodsFrom', result, filepath, true) + appendStatisticsFile(comments.name, 'importMethodsFrom', result, filepath, true) } function processExports(existing: CommentInfo, comments: string[]) { @@ -107,15 +97,15 @@ function processRoxygenUseDynLib(existing: CommentInfo, commentsText: string[], .flatMap(processMatchForDynLib) existing.useDynLib += result.length - append(comments.name, 'useDynLib', result, filepath, true) + appendStatisticsFile(comments.name, 'useDynLib', result, filepath, true) } export const comments: Feature = { name: 'Comments', description: 'All comments that appear within the document', - process(existing: CommentInfo, input: Document, filepath: string | undefined): CommentInfo { - const comments = commentQuery.select({ node: input }).map(node => node.textContent ?? '#') + process(existing: CommentInfo, input: FeatureProcessorInput): CommentInfo { + const comments = commentQuery.select({ node: input.parsedRAst }).map(node => node.textContent ?? '#') .map(text => { guard(text.startsWith('#'), `unexpected comment ${text}`) return text.slice(1) @@ -126,11 +116,11 @@ export const comments: Feature = { const roxygenComments = comments.filter(text => text.startsWith("'")) existing.roxygenComments += roxygenComments.length - processRoxygenImport(existing, roxygenComments, filepath) - processRoxygenImportFrom(existing, roxygenComments, filepath) - processRoxygenUseDynLib(existing, roxygenComments, filepath) - processRoxygenImportClassesFrom(existing, roxygenComments, filepath) - processRoxygenImportMethodsFrom(existing, roxygenComments, filepath) + processRoxygenImport(existing, roxygenComments, input.filepath) + processRoxygenImportFrom(existing, roxygenComments, input.filepath) + processRoxygenUseDynLib(existing, roxygenComments, input.filepath) + processRoxygenImportClassesFrom(existing, roxygenComments, input.filepath) + processRoxygenImportMethodsFrom(existing, roxygenComments, input.filepath) processExports(existing, roxygenComments) return existing diff --git a/src/statistics/features/supported/control-flow.ts b/src/statistics/features/supported/control-flow.ts index fe8518e17b..4a890ee71b 100644 --- a/src/statistics/features/supported/control-flow.ts +++ b/src/statistics/features/supported/control-flow.ts @@ -1,38 +1,28 @@ -import { Feature, FeatureInfo, Query } from '../feature' +import { Feature, FeatureProcessorInput, Query } from '../feature' import * as xpath from 'xpath-ts2' -import { append } from '../../output' +import { appendStatisticsFile } from '../../output' +import { Writable } from 'ts-essentials' -export interface ControlflowInfo extends FeatureInfo { - ifThen: number - ifThenElse: number - /** can be nested with if-s or if-then-else's */ - nestedIfThen: number - nestedIfThenElse: number - /** if(TRUE), ... */ - constantIfThen: number - constantIfThenElse: number - /** if(x), ... */ - singleVariableIfThen: number - singleVariableIfThenElse: number - /** switch(...) */ - switchCase: number - singleVariableSwitchCase: number - constantSwitchCase: number -} - -const initialControlflowInfo = (): ControlflowInfo => ({ +const initialControlflowInfo = { ifThen: 0, ifThenElse: 0, + /** can be nested with if-s or if-then-else's */ nestedIfThen: 0, nestedIfThenElse: 0, + /** if(TRUE), ... */ constantIfThen: 0, constantIfThenElse: 0, + /** if(x), ... */ singleVariableIfThen: 0, singleVariableIfThenElse: 0, + /** switch(...) */ switchCase: 0, singleVariableSwitchCase: 0, constantSwitchCase: 0 -}) +} + +export type ControlflowInfo = Writable + const ifThenQuery: Query = xpath.parse(`//IF[not(following-sibling::ELSE)]`) const ifThenElseQuery: Query = xpath.parse(`//IF[following-sibling::ELSE]`) @@ -59,20 +49,20 @@ function collectForIfThenOptionalElse(existing: ControlflowInfo, name: 'IfThen' // select when condition to check if constant, ... const conditions = selectCondition.select({ node: ifThenOptionalElse }) - append(controlflow.name, name, conditions, filepath) + appendStatisticsFile(controlflow.name, name, conditions, filepath) - const constantKey = `constant${name}` + const constantKey = `constant${name}` as keyof ControlflowInfo const constantConditions = conditions.flatMap(c => constantCondition.select({ node: c })) existing[constantKey] += constantConditions.length - append(controlflow.name, constantKey, constantConditions, filepath) + appendStatisticsFile(controlflow.name, constantKey, constantConditions, filepath) - const singleVariableKey = `singleVariable${name}` + const singleVariableKey = `singleVariable${name}` as keyof ControlflowInfo const singleVariableConditions = conditions.flatMap(c => singleVariableCondition.select({ node: c })) existing[singleVariableKey] += singleVariableConditions.length - append(controlflow.name, singleVariableKey, singleVariableConditions, filepath) + appendStatisticsFile(controlflow.name, singleVariableKey, singleVariableConditions, filepath) - const nestedKey = `nested${name}` + const nestedKey = `nested${name}` as keyof ControlflowInfo const nestedIfThen = nestedIfThenQuery.select({ node: ifThenOptionalElse }) existing[nestedKey] += nestedIfThen.length @@ -82,33 +72,33 @@ export const controlflow: Feature = { name: 'Controlflow', description: 'Deals with if-then-else and switch-case', - process(existing: ControlflowInfo, input: Document, filepath: string | undefined): ControlflowInfo { + process(existing: ControlflowInfo, input: FeatureProcessorInput): ControlflowInfo { - const ifThen = ifThenQuery.select({ node: input }) - const ifThenElse = ifThenElseQuery.select({ node: input }) + const ifThen = ifThenQuery.select({ node: input.parsedRAst }) + const ifThenElse = ifThenElseQuery.select({ node: input.parsedRAst }) existing.ifThen += ifThen.length existing.ifThenElse += ifThenElse.length - ifThen.forEach(ifThen => { collectForIfThenOptionalElse(existing, 'IfThen', ifThen, filepath) }) - ifThenElse.forEach(ifThenElse => { collectForIfThenOptionalElse(existing, 'IfThenElse', ifThenElse, filepath) }) + ifThen.forEach(ifThen => { collectForIfThenOptionalElse(existing, 'IfThen', ifThen, input.filepath) }) + ifThenElse.forEach(ifThenElse => { collectForIfThenOptionalElse(existing, 'IfThenElse', ifThenElse, input.filepath) }) - const switchCases = switchQuery.select({ node: input }) + const switchCases = switchQuery.select({ node: input.parsedRAst }) existing.switchCase += switchCases.length - append(controlflow.name, 'switchCase', switchCases, filepath) + appendStatisticsFile(controlflow.name, 'switchCase', switchCases, input.filepath) const constantSwitchCases = switchCases.flatMap(switchCase => constantCondition.select({ node: switchCase }) ) existing.constantSwitchCase += constantSwitchCases.length - append(controlflow.name, 'constantSwitchCase', constantSwitchCases, filepath) + appendStatisticsFile(controlflow.name, 'constantSwitchCase', constantSwitchCases, input.filepath) const variableSwitchCases = switchCases.flatMap(switchCase => singleVariableCondition.select({ node: switchCase }) ) existing.singleVariableSwitchCase += variableSwitchCases.length - append(controlflow.name, 'variableSwitchCase', variableSwitchCases, filepath) + appendStatisticsFile(controlflow.name, 'variableSwitchCase', variableSwitchCases, input.filepath) return existing }, diff --git a/src/statistics/features/supported/data-access.ts b/src/statistics/features/supported/data-access.ts index de5f5b602e..ea6273843e 100644 --- a/src/statistics/features/supported/data-access.ts +++ b/src/statistics/features/supported/data-access.ts @@ -1,24 +1,9 @@ -import { Feature, FeatureInfo, Query } from '../feature' +import { Feature, FeatureProcessorInput, Query } from '../feature' import * as xpath from 'xpath-ts2' -import { append, extractNodeContent } from '../../output' - -export interface DataAccess extends FeatureInfo { - singleBracket: number - singleBracketEmpty: number - singleBracketConstant: number - singleBracketSingleVariable: number - singleBracketCommaAccess: number - doubleBracket: number - doubleBracketEmpty: number - doubleBracketConstant: number - doubleBracketSingleVariable: number - doubleBracketCommaAccess: number - chainedOrNestedAccess: number - byName: number - bySlot: number -} +import { appendStatisticsFile, extractNodeContent } from '../../output' +import { Writable } from 'ts-essentials' -const initialDataAccessInfo = (): DataAccess => ({ +const initialDataAccessInfo = { singleBracket: 0, singleBracketEmpty: 0, singleBracketConstant: 0, @@ -32,7 +17,10 @@ const initialDataAccessInfo = (): DataAccess => ({ chainedOrNestedAccess: 0, byName: 0, bySlot: 0 -}) +} + +export type DataAccess = Writable + const singleBracketAccess: Query = xpath.parse(`//expr/SYMBOL/../../*[preceding-sibling::OP-LEFT-BRACKET][1]`) const doubleBracketAccess: Query = xpath.parse(`//expr/SYMBOL/../../*[preceding-sibling::LBB][1]`) @@ -56,7 +44,7 @@ const commaAccess: Query = xpath.parse(`../OP-COMMA`) function processForBracketAccess(existing: DataAccess, nodes: Node[], access: 'singleBracket' | 'doubleBracket', filepath: string | undefined) { // we use the parent node to get more information in the output if applicable - append(dataAccess.name, access, nodes.map(n => n.parentNode ?? n), filepath) + appendStatisticsFile(dataAccess.name, access, nodes.map(n => n.parentNode ?? n), filepath) existing[access] += nodes.length const constantAccesses = nodes.flatMap(n => constantAccess.select({ node: n })) @@ -75,24 +63,24 @@ export const dataAccess: Feature = { name: 'Data Access', description: 'Ways of accessing data structures in R', - process(existing: DataAccess, input: Document, filepath: string | undefined): DataAccess { - const singleBracketAccesses = singleBracketAccess.select({ node: input }) - const doubleBracketAccesses = doubleBracketAccess.select({ node: input }) + process(existing: DataAccess, input: FeatureProcessorInput): DataAccess { + const singleBracketAccesses = singleBracketAccess.select({ node: input.parsedRAst }) + const doubleBracketAccesses = doubleBracketAccess.select({ node: input.parsedRAst }) - processForBracketAccess(existing, singleBracketAccesses, 'singleBracket', filepath) - processForBracketAccess(existing, doubleBracketAccesses, 'doubleBracket', filepath) + processForBracketAccess(existing, singleBracketAccesses, 'singleBracket', input.filepath) + processForBracketAccess(existing, doubleBracketAccesses, 'doubleBracket', input.filepath) - const namedAccesses = namedAccess.select({ node: input }) - append(dataAccess.name, 'byName', namedAccesses.map(n => n.parentNode ?? n), filepath) + const namedAccesses = namedAccess.select({ node: input.parsedRAst }) + appendStatisticsFile(dataAccess.name, 'byName', namedAccesses.map(n => n.parentNode ?? n), input.filepath) existing.byName += namedAccesses.length - const slottedAccesses = slottedAccess.select({ node: input }) - append(dataAccess.name, 'bySlot', slottedAccesses.map(n => n.parentNode ?? n), filepath) + const slottedAccesses = slottedAccess.select({ node: input.parsedRAst }) + appendStatisticsFile(dataAccess.name, 'bySlot', slottedAccesses.map(n => n.parentNode ?? n), input.filepath) existing.bySlot += slottedAccesses.length - const chainedOrNestedAccesses = chainedOrNestedAccess.select({ node: input }) - append(dataAccess.name, 'chainedOrNestedAccess', chainedOrNestedAccesses.map(n => n.parentNode ?? n), filepath) + const chainedOrNestedAccesses = chainedOrNestedAccess.select({ node: input.parsedRAst }) + appendStatisticsFile(dataAccess.name, 'chainedOrNestedAccess', chainedOrNestedAccesses.map(n => n.parentNode ?? n), input.filepath) existing.chainedOrNestedAccess += chainedOrNestedAccesses.length return existing diff --git a/src/statistics/features/supported/defined-functions.ts b/src/statistics/features/supported/defined-functions.ts index 1810ad5a32..5265bbeea1 100644 --- a/src/statistics/features/supported/defined-functions.ts +++ b/src/statistics/features/supported/defined-functions.ts @@ -1,33 +1,27 @@ -import { Feature, FeatureInfo, Query } from '../feature' +import { Feature, FeatureProcessorInput, Query } from '../feature' import * as xpath from 'xpath-ts2' -import { append, extractNodeContent } from '../../output' +import { appendStatisticsFile, extractNodeContent } from '../../output' +import { Writable } from 'ts-essentials' export type FunctionNameInfo = string -export interface FunctionDefinitionInfo extends FeatureInfo { +const initialFunctionDefinitionInfo = { /** all, anonymous, assigned, non-assigned, ... */ - total: number - /** how many are really using OP-Lambda? */ - lambdasOnly: number - /** using `<<-`, `<-`, `=`, `->` `->>` */ - assignedFunctions: number - usedArgumentNames: number - /** anonymous functions invoked directly */ - functionsDirectlyCalled: number - nestedFunctions: number - /** functions that in some easily detectable way call themselves */ - recursive: number -} - -const initialFunctionDefinitionInfo = (): FunctionDefinitionInfo => ({ total: 0, + /** how many are really using OP-Lambda? */ lambdasOnly: 0, + /** using `<<-`, `<-`, `=`, `->` `->>` */ assignedFunctions: 0, usedArgumentNames: 0, + /** anonymous functions invoked directly */ functionsDirectlyCalled: 0, nestedFunctions: 0, + /** functions that in some easily detectable way call themselves */ recursive: 0 -}) +} + +export type FunctionDefinitionInfo = Writable + // note, that this can not work with assign, setGeneric and so on for now const queryAnyFunctionDefinition: Query = xpath.parse(`//FUNCTION`) @@ -73,26 +67,26 @@ export const definedFunctions: Feature = { name: 'Defined Functions', description: 'All functions defined within the document', - process(existing: FunctionDefinitionInfo, input: Document, filepath: string | undefined): FunctionDefinitionInfo { - const allFunctions = queryAnyFunctionDefinition.select({ node: input }).length - const allLambdas = queryAnyLambdaDefinition.select({ node: input }) + process(existing: FunctionDefinitionInfo, input: FeatureProcessorInput): FunctionDefinitionInfo { + const allFunctions = queryAnyFunctionDefinition.select({ node: input.parsedRAst }).length + const allLambdas = queryAnyLambdaDefinition.select({ node: input.parsedRAst }) - append(this.name, 'allLambdas', allLambdas, filepath) + appendStatisticsFile(this.name, 'allLambdas', allLambdas, input.filepath) existing.total += allFunctions + allLambdas.length existing.lambdasOnly += allLambdas.length - const usedArgumentNames = queryUsedArgumentNames.select({ node: input }) + const usedArgumentNames = queryUsedArgumentNames.select({ node: input.parsedRAst }) existing.usedArgumentNames += usedArgumentNames.length - append(this.name, 'usedArgumentNames', usedArgumentNames, filepath) + appendStatisticsFile(this.name, 'usedArgumentNames', usedArgumentNames, input.filepath) - existing.functionsDirectlyCalled += defineFunctionsToBeCalled.select({ node: input }).length - existing.nestedFunctions += nestedFunctionsQuery.select({ node: input }).length + existing.functionsDirectlyCalled += defineFunctionsToBeCalled.select({ node: input.parsedRAst }).length + existing.nestedFunctions += nestedFunctionsQuery.select({ node: input.parsedRAst }).length - const assignedFunctions = queryAssignedFunctionDefinitions.select({ node: input }) + const assignedFunctions = queryAssignedFunctionDefinitions.select({ node: input.parsedRAst }) const assignedNames = assignedFunctions.map(extractNodeContent) existing.assignedFunctions += assignedFunctions.length - append(this.name, 'assignedFunctions', assignedNames, filepath) + appendStatisticsFile(this.name, 'assignedFunctions', assignedNames, input.filepath) const recursiveFunctions = [] for(let i = 0; i < assignedFunctions.length; i++) { @@ -102,7 +96,7 @@ export const definedFunctions: Feature = { } } existing.recursive += recursiveFunctions.length - append(this.name, 'recursiveFunctions', recursiveFunctions, filepath) + appendStatisticsFile(this.name, 'recursiveFunctions', recursiveFunctions, input.filepath) return existing }, diff --git a/src/statistics/features/supported/loops.ts b/src/statistics/features/supported/loops.ts index 80de4b7b3c..1de3e48b33 100644 --- a/src/statistics/features/supported/loops.ts +++ b/src/statistics/features/supported/loops.ts @@ -1,16 +1,10 @@ -import { Feature, FeatureInfo, Query } from '../feature' +import { Feature, FeatureProcessorInput, Query } from '../feature' import * as xpath from 'xpath-ts2' -import { append } from '../../output' - -export interface LoopInfo extends FeatureInfo { - forLoops: number - whileLoops: number - repeatLoops: number - breakStatements: number - nextStatements: number -} +import { appendStatisticsFile } from '../../output' +import { Writable } from 'ts-essentials' + -const initialLoopInfo = (): LoopInfo => ({ +const initialLoopInfo = { forLoops: 0, whileLoops: 0, repeatLoops: 0, @@ -18,8 +12,9 @@ const initialLoopInfo = (): LoopInfo => ({ nextStatements: 0, /** apply, tapply, lapply, ...*/ implicitLoops: 0 -}) +} +export type LoopInfo = Writable const forLoopQuery: Query = xpath.parse(`//FOR`) const whileLoopQuery: Query = xpath.parse(`//WHILE`) @@ -39,13 +34,13 @@ export const loops: Feature = { name: 'Loops', description: 'All looping structures in the document', - process(existing: LoopInfo, input: Document, filepath: string | undefined): LoopInfo { - const forLoops = forLoopQuery.select({ node: input }) - const whileLoops = whileLoopQuery.select({ node: input }) - const repeatLoops = repeatLoopQuery.select({ node: input }) - const breakStatements = breakStatementQuery.select({ node: input }) - const nextStatements = nextStatementQuery.select({ node: input }) - const implicitLoops = implicitLoopQuery.select({ node: input }) + process(existing: LoopInfo, input: FeatureProcessorInput): LoopInfo { + const forLoops = forLoopQuery.select({ node: input.parsedRAst }) + const whileLoops = whileLoopQuery.select({ node: input.parsedRAst }) + const repeatLoops = repeatLoopQuery.select({ node: input.parsedRAst }) + const breakStatements = breakStatementQuery.select({ node: input.parsedRAst }) + const nextStatements = nextStatementQuery.select({ node: input.parsedRAst }) + const implicitLoops = implicitLoopQuery.select({ node: input.parsedRAst }) existing.forLoops += forLoops.length existing.whileLoops += whileLoops.length @@ -53,7 +48,7 @@ export const loops: Feature = { existing.breakStatements += breakStatements.length existing.nextStatements += nextStatements.length existing.implicitLoops += implicitLoops.length - append(this.name, 'implicit-loops', implicitLoops, filepath) + appendStatisticsFile(this.name, 'implicit-loops', implicitLoops, input.filepath) return existing }, diff --git a/src/statistics/features/supported/used-functions.ts b/src/statistics/features/supported/used-functions.ts index 5c2ad768eb..1197715102 100644 --- a/src/statistics/features/supported/used-functions.ts +++ b/src/statistics/features/supported/used-functions.ts @@ -1,50 +1,32 @@ import { SinglePackageInfo } from './used-packages' import { FunctionNameInfo } from './defined-functions' -import { Feature, FeatureInfo, Query } from '../feature' +import { Feature, FeatureProcessorInput, Query } from '../feature' import * as xpath from 'xpath-ts2' -import { append, extractNodeContent } from '../../output' +import { appendStatisticsFile, extractNodeContent } from '../../output' +import { Writable } from 'ts-essentials' export interface UsedFunction { package: SinglePackageInfo, function: FunctionNameInfo } - -export interface FunctionUsageInfo extends FeatureInfo { - allFunctionCalls: number - /** abs, expm1, tanpi, ... */ - mathFunctions: number - /** nargs, missing, is.character, ... */ - programmingFunctions: number - /** browser, proc.time, gc.time, ... */ - sessionManagementFunctions: number - /** `:`, `~`, `c`, `UseMethod`, `.C`, ... */ - primitiveFunctions: number - /** e.g. do not evaluate part of functions, `quote`, ... */ - specialPrimitiveFunctions: number - /** `.Primitive`, `.Internal`, `lazyLoadDBfetch`, ... */ - internalFunctions: number - /** `body`, `environment`, `formals` */ - metaFunctions: number - /** return */ - returnFunction: number - parsingFunctions: number - editFunctions: number - assignFunctions: number - getFunctions: number - helpFunctions: number - optionFunctions: number -} - -const initialFunctionUsageInfo = (): FunctionUsageInfo => ({ +const initialFunctionUsageInfo = { allFunctionCalls: 0, + /** abs, expm1, tanpi, ... */ mathFunctions: 0, + /** nargs, missing, is.character, ... */ programmingFunctions: 0, + /** browser, proc.time, gc.time, ... */ sessionManagementFunctions: 0, + /** `:`, `~`, `c`, `UseMethod`, `.C`, ... */ primitiveFunctions: 0, + /** e.g. do not evaluate part of functions, `quote`, ... */ specialPrimitiveFunctions: 0, + /** `.Primitive`, `.Internal`, `lazyLoadDBfetch`, ... */ internalFunctions: 0, + /** `body`, `environment`, `formals` */ metaFunctions: 0, + /** return */ returnFunction: 0, parsingFunctions: 0, editFunctions: 0, @@ -52,7 +34,10 @@ const initialFunctionUsageInfo = (): FunctionUsageInfo => ({ getFunctions: 0, helpFunctions: 0, optionFunctions: 0 -}) +} + +export type FunctionUsageInfo = Writable + function from(...names: string[]): RegExp { return new RegExp(names.join('|')) @@ -114,11 +99,11 @@ export const usedFunctions: Feature = { name: 'Used Functions', description: 'All functions called, split into various sub-categories', - process(existing: FunctionUsageInfo, input: Document, filepath: string | undefined): FunctionUsageInfo { - const allFunctionCalls = functionCallQuery.select({ node: input }) + process(existing: FunctionUsageInfo, input: FeatureProcessorInput): FunctionUsageInfo { + const allFunctionCalls = functionCallQuery.select({ node: input.parsedRAst }) existing.allFunctionCalls += allFunctionCalls.length - append(this.name, 'allFunctionCalls', allFunctionCalls, filepath) + appendStatisticsFile(this.name, 'allFunctionCalls', allFunctionCalls, input.filepath) const names = allFunctionCalls.map(extractNodeContent) diff --git a/src/statistics/features/supported/used-packages.ts b/src/statistics/features/supported/used-packages.ts index c8657c5aa3..555750a800 100644 --- a/src/statistics/features/supported/used-packages.ts +++ b/src/statistics/features/supported/used-packages.ts @@ -1,24 +1,12 @@ -import { Feature, FeatureInfo, Query } from '../feature' +import { Feature, FeatureProcessorInput, Query } from '../feature' import * as xpath from 'xpath-ts2' import { EvalOptions } from 'xpath-ts2/src/parse-api' -import { append } from '../../output' +import { appendStatisticsFile } from '../../output' +import { Writable } from 'ts-essentials' export type SinglePackageInfo = string -export interface UsedPackageInfo extends FeatureInfo { - library: number - require: number - loadNamespace: number - requireNamespace: number - attachNamespace: number - withinApply: number - '::': number - ':::': number - /** just contains all occurrences where it is impossible to statically determine which package is loaded */ - '': number -} - -const initialUsedPackageInfos = (): UsedPackageInfo => ({ +const initialUsedPackageInfos = { library: 0, require: 0, loadNamespace: 0, @@ -27,9 +15,11 @@ const initialUsedPackageInfos = (): UsedPackageInfo => ({ withinApply: 0, '::': 0, ':::': 0, + /** just contains all occurrences where it is impossible to statically determine which package is loaded */ '': 0 -}) +} +export type UsedPackageInfo = Writable // based on the extraction routine of lintr search for function calls which are not character-loads (we can not trace those...) const withinApply: Query = xpath.parse(` @@ -103,27 +93,27 @@ export const usedPackages: Feature = { name: 'Used Packages', description: 'All the packages used in the code', - process(existing: UsedPackageInfo, input: Document, filepath: string | undefined): UsedPackageInfo { + process(existing: UsedPackageInfo, input: FeatureProcessorInput): UsedPackageInfo { // we will unify in the end, so we can count, group etc. but we do not re-count multiple packages in the same file for(const q of queries) { for(const fn of q.types) { - const nodes = q.query.select({ node: input, variables: { variable: fn } }) + const nodes = q.query.select({ node: input.parsedRAst, variables: { variable: fn } }) existing[fn] += nodes.length - append(this.name, fn, nodes, filepath, true) + appendStatisticsFile(this.name, fn, nodes, input.filepath, true) } } const nodesForVariableLoad = [ - ...packageLoadedWithVariableLoadRequire.select({ node: input }), - ...packageLoadedWithVariableNamespaces.select({ node: input }) + ...packageLoadedWithVariableLoadRequire.select({ node: input.parsedRAst }), + ...packageLoadedWithVariableNamespaces.select({ node: input.parsedRAst }) ] existing[''] += nodesForVariableLoad.length // should not be unique as variables may be repeated, and we have no idea - append(this.name, '', nodesForVariableLoad, filepath) + appendStatisticsFile(this.name, '', nodesForVariableLoad, input.filepath) - const withinApplyNodes = withinApply.select({ node: input }) + const withinApplyNodes = withinApply.select({ node: input.parsedRAst }) existing.withinApply += withinApplyNodes.length - append(this.name, 'withinApply', withinApplyNodes, filepath) + appendStatisticsFile(this.name, 'withinApply', withinApplyNodes, input.filepath) return existing }, diff --git a/src/statistics/features/supported/values.ts b/src/statistics/features/supported/values.ts index 1ecc990b92..6a321778ae 100644 --- a/src/statistics/features/supported/values.ts +++ b/src/statistics/features/supported/values.ts @@ -1,21 +1,11 @@ -import { Feature, FeatureInfo, Query } from '../feature' +import { Feature, FeatureProcessorInput, Query } from '../feature' import * as xpath from 'xpath-ts2' import { RNumHexFloatRegex } from '../../../r-bridge' import { assertUnreachable } from '../../../util/assert' -import { append } from '../../output' +import { appendStatisticsFile } from '../../output' +import { Writable } from 'ts-essentials' -export interface ValueInfo extends FeatureInfo { - allNumerics: number, - imaginaryNumbers: number, - integers: number, - floatHex: number, - - logical: number, - specialConstants: number, - strings: number -} - -const initialValueInfo = (): ValueInfo => ({ +const initialValueInfo = { allNumerics: 0, imaginaryNumbers: 0, integers: 0, @@ -24,7 +14,10 @@ const initialValueInfo = (): ValueInfo => ({ logical: 0, specialConstants: 0, strings: 0 -}) +} + +export type ValueInfo = Writable + const numericConstantQuery: Query = xpath.parse(`//NUM_CONST`) const stringConstantQuery: Query = xpath.parse(`//STR_CONST`) @@ -55,11 +48,11 @@ export const values: Feature = { name: 'Values', description: 'All values used (as constants etc.)', - process(existing: ValueInfo, input: Document, filepath: string | undefined): ValueInfo { - const strings = stringConstantQuery.select({ node: input}) - const numerics = numericConstantQuery.select({ node: input}) - const specialConstants = specialConstantsQuery.select({ node: input}) - const specialLogicalSymbols = shortLogicalSymbolQuery.select({ node: input}) + process(existing: ValueInfo, input: FeatureProcessorInput): ValueInfo { + const strings = stringConstantQuery.select({ node: input.parsedRAst }) + const numerics = numericConstantQuery.select({ node: input.parsedRAst }) + const specialConstants = specialConstantsQuery.select({ node: input.parsedRAst }) + const specialLogicalSymbols = shortLogicalSymbolQuery.select({ node: input.parsedRAst }) const numbers: Node[] = [] numerics.map(n => [n, classifyNumericConstants(n.textContent ?? '', existing)] as const) @@ -81,10 +74,10 @@ export const values: Feature = { existing.specialConstants += specialConstants.length existing.logical += specialLogicalSymbols.length - append(this.name, 'numeric', numbers, filepath) - append(this.name, 'string', strings, filepath) - append(this.name, 'specialConstant', specialConstants, filepath) - append(this.name, 'logical', specialLogicalSymbols, filepath) + appendStatisticsFile(this.name, 'numeric', numbers, input.filepath) + appendStatisticsFile(this.name, 'string', strings, input.filepath) + appendStatisticsFile(this.name, 'specialConstant', specialConstants, input.filepath) + appendStatisticsFile(this.name, 'logical', specialLogicalSymbols, input.filepath) return existing }, diff --git a/src/statistics/index.ts b/src/statistics/index.ts index f1d67dbddb..cc9fbd65e9 100644 --- a/src/statistics/index.ts +++ b/src/statistics/index.ts @@ -2,3 +2,5 @@ export * from './statistics' export * from './features' export * from './output' export * from './post-process' +export { initialMetaStatistics } from './meta-statistics' +export { MetaStatistics } from './meta-statistics' diff --git a/src/statistics/meta-statistics.ts b/src/statistics/meta-statistics.ts new file mode 100644 index 0000000000..3d1b9f28f7 --- /dev/null +++ b/src/statistics/meta-statistics.ts @@ -0,0 +1,38 @@ +import { RParseRequestFromFile, RParseRequestFromText } from '../r-bridge' + +/** + * Statistics on skipped files, the time required, and more. + * + * @see extractUsageStatistics + * @see initialMetaStatistics + */ +export interface MetaStatistics { + /** + * The number of requests that were parsed successfully + */ + successfulParsed: number + /** + * The processing time for each request + */ + processingTimeMs: number[] + /** + * All failed requests (e.g., if they can not be converted to XML) + */ + failedRequests: (RParseRequestFromText | RParseRequestFromFile)[] + /** + * Number of lines with each individual line length consumed for each request + */ + lines: number[][] +} + +/** + * Returns an initial {@link MetaStatistics} object, using neutral defaults (like the empty list). + */ +export function initialMetaStatistics(): MetaStatistics { + return { + successfulParsed: 0, + processingTimeMs: [], + failedRequests: [], + lines: [] + } +} diff --git a/src/statistics/output/print-stats.ts b/src/statistics/output/print-stats.ts index 3b007fb4b1..fed3659b9c 100644 --- a/src/statistics/output/print-stats.ts +++ b/src/statistics/output/print-stats.ts @@ -1,6 +1,6 @@ import { ALL_FEATURES, FeatureKey, FeatureStatistics } from '../features' -import { MetaStatistics } from '../statistics' import { ColorEffect, Colors, formatter } from './ansi' +import { MetaStatistics } from '../meta-statistics' interface MinMaxAvgMedian { sum: number, min: number, max: number, avg: number, median: number} @@ -42,7 +42,7 @@ export function printFeatureStatistics(statistics: {features: FeatureStatistics, const lineLengths = minMaxAvgAndMedian(statistics.meta.lines.flat()) const processingTimesPerFile = minMaxAvgAndMedian(statistics.meta.processingTimeMs) - console.log(`processed ${statistics.meta.successfulParsed} files (skipped ${statistics.meta.skipped.length} due to errors): + console.log(`processed ${statistics.meta.successfulParsed} files (skipped ${statistics.meta.failedRequests.length} due to errors): \ttotal processing time: ${processingTimesPerFile.sum} ms \t\tprocessing time range: ${statsString(processingTimesPerFile, ` ms`)} \ttotal number of lines: ${lineLengths.sum} diff --git a/src/statistics/output/statistics-file.ts b/src/statistics/output/statistics-file.ts index bdd89649a0..d3ddc31bec 100644 --- a/src/statistics/output/statistics-file.ts +++ b/src/statistics/output/statistics-file.ts @@ -42,14 +42,14 @@ export interface StatisticsOutputFormat { } /** - * append the content of all nodes to the storage file for the given feature - * @param name - the name of the feature {@link Feature#name} - * @param fn - the name of the feature-aspect to record - * @param nodes - the nodes to append, you may pass already transformed string contents - * @param context - the context of the information retrieval (e.g. the name of the file that contained the R source code) - * @param unique - should duplicate entries be removed on addition + * Append the content of all nodes to the storage file for the given feature + * @param name - The name of the feature {@link Feature#name} + * @param fn - The name of the feature-aspect to record + * @param nodes - The nodes to append, you may pass already transformed string contents + * @param context - The context of the information retrieval (e.g. the name of the file that contained the R source code) + * @param unique - Should duplicate entries be removed on addition */ -export function append(name: string, fn: keyof T, nodes: string[] | Node[], context: string | undefined, unique = false ) { +export function appendStatisticsFile(name: string, fn: keyof T, nodes: string[] | Node[], context: string | undefined, unique = false ) { if(nodes.length === 0) { return } diff --git a/src/statistics/post-process/post-process-output.ts b/src/statistics/post-process/post-process-output.ts index 3ec7d7b657..ff6da1a6fb 100644 --- a/src/statistics/post-process/post-process-output.ts +++ b/src/statistics/post-process/post-process-output.ts @@ -49,7 +49,7 @@ function processFeatureFolder(filepath: string, feature: FeatureKey): ClusterRep const contextIdMap: ClusterContextIdMap = new DefaultMap(deterministicCountingIdGenerator()) - const featureSubKeys = Object.keys(featureInfo.initialValue()) + const featureSubKeys = Object.keys(featureInfo.initialValue) const reports: ClusterReport[] = [] for(const subKey of featureSubKeys) { const value = processFeatureSubKey(targetPath, subKey, contextIdMap) diff --git a/src/statistics/statistics.ts b/src/statistics/statistics.ts index a9cda1ab3c..6aae6443e9 100644 --- a/src/statistics/statistics.ts +++ b/src/statistics/statistics.ts @@ -1,69 +1,22 @@ import { - RShell, - retrieveXmlFromRCode, + getStoredTokenMap, RParseRequest, RParseRequestFromFile, - RParseRequestFromText + RParseRequestFromText, + RShell, + TokenMap } from '../r-bridge' -import { ALL_FEATURES, Feature, FeatureKey, FeatureSelection, FeatureStatistics } from './features' +import { ALL_FEATURES, allFeatureNames, Feature, FeatureKey, FeatureSelection, FeatureStatistics } from './features' import { DOMParser } from '@xmldom/xmldom' import fs from 'fs' import { log } from '../util/log' +import { initialMetaStatistics, MetaStatistics } from './meta-statistics' +import { SteppingSlicer } from '../core' -const parser = new DOMParser() - -export async function extractSingle(result: FeatureStatistics, shell: RShell, from: RParseRequest, features: 'all' | Set): Promise { - const xml = await retrieveXmlFromRCode(from, shell) - const doc = parser.parseFromString(xml, 'text/xml') - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - for(const [key, feature] of Object.entries(ALL_FEATURES) as [FeatureKey, Feature][]) { - if(features !== 'all' && !features.has(key)) { - continue - } - // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment - result[key] = feature.process(result[key], doc, from.request === 'file' ? from.content : undefined) - } - - return result -} - -export interface MetaStatistics { - /** - * the number of requests that were parsed successfully - */ - successfulParsed: number - /** - * the processing time for each request - */ - processingTimeMs: number[] - /** - * skipped requests - */ - skipped: string[] - /** - * number of lines with each individual line length consumed for each request - */ - lines: number[][] -} - -const initialMetaStatistics: () => MetaStatistics = () => ({ - successfulParsed: 0, - processingTimeMs: [], - skipped: [], - lines: [] -}) - - -function processMetaOnSuccessful(meta: MetaStatistics, request: T) { - meta.successfulParsed++ - if(request.request === 'text') { - meta.lines.push(request.content.split('\n').map(l => l.length)) - } else { - meta.lines.push(fs.readFileSync(request.content, 'utf-8').split('\n').map(l => l.length)) - } -} - +/** + * By default, {@link extractUsageStatistics} requires a generator, but sometimes you already know all the files + * that you want to process. This function simply reps your requests as a generator. + */ export function staticRequests(...requests: (RParseRequestFromText | RParseRequestFromFile)[]): AsyncGenerator { // eslint-disable-next-line @typescript-eslint/require-await return async function* () { @@ -74,7 +27,13 @@ export function staticRequests(...requests: (RParseRequestFromText | RParseReque } /** - * extract all statistic information from a set of requests using the presented R session + * Extract all wanted statistic information from a set of requests using the presented R session. + * + * @param shell - The R session to use + * @param onRequest - A callback that is called at the beginning of each request, this may be used to debug the requests. + * @param features - The features to extract (see {@link allFeatureNames}). + * @param requests - The requests to extract the features from. May generate them on demand (e.g., by traversing a folder). + * If your request is statically known, you can use {@link staticRequests} to create this generator. */ export async function extractUsageStatistics( shell: RShell, @@ -82,19 +41,16 @@ export async function extractUsageStatistics ): Promise<{ features: FeatureStatistics, meta: MetaStatistics }> { - let result = {} as FeatureStatistics - for(const key of Object.keys(ALL_FEATURES)) { - result[key as FeatureKey] = ALL_FEATURES[key as FeatureKey].initialValue() - } - + let result = initializeFeatureStatistics() const meta = initialMetaStatistics() + const tokenMap = await getStoredTokenMap(shell) let first = true for await (const request of requests) { onRequest(request) const start = performance.now() try { - result = await extractSingle(result, shell, { + result = await extractSingle(result, shell, tokenMap, { ...request, attachSourceInformation: true, ensurePackageInstalled: first @@ -103,12 +59,62 @@ export async function extractUsageStatistics(meta: MetaStatistics, request: T) { + meta.failedRequests.push(request) +} + +function processMetaOnSuccessful(meta: MetaStatistics, request: T) { + meta.successfulParsed++ + if(request.request === 'text') { + meta.lines.push(request.content.split('\n').map(l => l.length)) + } else { + meta.lines.push(fs.readFileSync(request.content, 'utf-8').split('\n').map(l => l.length)) + } +} +const parser = new DOMParser() + +async function extractSingle(result: FeatureStatistics, shell: RShell, tokenMap: TokenMap, request: RParseRequest, features: 'all' | Set): Promise { + const slicerOutput = await new SteppingSlicer({ + stepOfInterest: 'dataflow', + request, shell, + tokenMap + }).allRemainingSteps() + // await retrieveXmlFromRCode(from, shell) + const doc = parser.parseFromString(slicerOutput.parse, 'text/xml') + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + for(const [key, feature] of Object.entries(ALL_FEATURES) as [FeatureKey, Feature][]) { + + if(features !== 'all' && !features.has(key)) { + continue + } + + // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment + result[key] = feature.process(result[key], { + parsedRAst: doc, + dataflow: slicerOutput.dataflow, + normalizedRAst: slicerOutput.normalize, + filepath: request.request === 'file' ? request.content : undefined + }) + } + + return result +} diff --git a/src/util/objects.ts b/src/util/objects.ts index 669d08ee56..8172c848a1 100644 --- a/src/util/objects.ts +++ b/src/util/objects.ts @@ -33,7 +33,7 @@ export function deepMergeObject(base?: Mergeable, addon?: Mergeable): Mergeable throw new Error('illegal types for deepMergeObject!') } - const result = Object.assign({}, base) as MergeableRecord + const result: MergeableRecord = { ...base } const baseIsArray = Array.isArray(base) const addonIsArray = Array.isArray(addon) diff --git a/test/functionality/statistics/statistics.spec.ts b/test/functionality/statistics/statistics.spec.ts index a6214086a7..1f4affe0f1 100644 --- a/test/functionality/statistics/statistics.spec.ts +++ b/test/functionality/statistics/statistics.spec.ts @@ -18,7 +18,7 @@ async function expectFeature(shell: RShell, feature: T, co export function testForFeatureForInput(shell: RShell, feature: T, tests: { name: string, code: string, expected: Partial> }[]) { const featureInfo = ALL_FEATURES[feature] for(const test of tests) { - const expected = deepMergeObject(featureInfo.initialValue(), test.expected) as FeatureValue + const expected = deepMergeObject(featureInfo.initialValue, test.expected) as FeatureValue it(test.name, async() => { await expectFeature(shell, feature, test.code, expected) }) diff --git a/wiki/Interface.md b/wiki/Interface.md index 4de1adab45..65213f18b1 100644 --- a/wiki/Interface.md +++ b/wiki/Interface.md @@ -1,6 +1,6 @@ ***This wiki page is currently under construction*** -Although far from being as detailed as the in-depth explanation of [*flowR*](https://github.com/Code-Inspect/flowr/wiki/Core), this wiki page is written for programmers (knowledgeable of TypeScript) and explains how to interface and use *flowR*.<1> +Although far from being as detailed as the in-depth explanation of [*flowR*](https://github.com/Code-Inspect/flowr/wiki/Core), this wiki page explains how to interface with *flowR* in more detail.<1> - [💬 Communicating With the Server](#-communicating-with-the-server) @@ -15,7 +15,9 @@ Although far from being as detailed as the in-depth explanation of [*flowR*](htt - [Understanding the Steps](#understanding-the-steps) - [Benchmark the Slicer With The `BenchmarkSlicer`](#benchmark-the-slicer-with-the-benchmarkslicer) - [Augmenting the Normalization](#augmenting-the-normalization) - - [Generate Statistics with `extractUsageStatistics()`](#generate-statistics-with-extractusagestatistics) + - [Generate Statistics](#generate-statistics) + - [Extract Statistics with `extractUsageStatistics()`](#extract-statistics-with-extractusagestatistics) + - [Adding a New Feature to Extract](#adding-a-new-feature-to-extract) ## 💬 Communicating With the Server @@ -773,9 +775,78 @@ await new SteppingSlicer({ The `after` hook is called after the normalization has created the respective normalized string node, so we can be sure that the node was indeed a string! Besides incrementing the respective counter, we could return a value that the normalization should use instead (but we do not do that in this example). See the [documentation](https://code-inspect.github.io/flowr/doc/interfaces/src_r_bridge_lang_4_x_ast_parser_xml_hooks.XmlParserHooks.html) for more information. -### Generate Statistics with `extractUsageStatistics()` +### Generate Statistics **TODO: will probably change as part of the planned paper** +#### Extract Statistics with `extractUsageStatistics()` + +#### Adding a New Feature to Extract + +In this example we construct a new feature to extract, with the name "*example*". +Whenever this name appears, you may substitute this with whatever name fits your feature best (as long as the name is unique). + +1. **Create a new file in `src/statistics/features/supported`**\ + Create the file `example.ts`, and add its export to the `index.ts` file in the same directory (if not done automatically). + +2. **Create the basic structure**\ + To get a better feel of what a feature must have, let's look + at the basic structure (of course, due to TypeScript syntax, + there are other ways to achieve the same goal): + + ```ts + const initialExampleInfo = { + /* whatever start value is good for you */ + someCounter: 0 + } + + export type ExampleInfo = Writable + + export const example: Feature = { + name: 'Example Feature', + description: 'A longer example description', + + process(existing: ExampleInfo, input: FeatureProcessorInput): ExampleInfo { + /* perform analysis on the input */ + return existing + }, + + initialValue: initialExampleInfo + } + ``` + + The `initialExampleInfo` type holds the initial values for each counter that you want to maintain during the feature extraction (they will usually be initialized with 0). The resulting `ExampleInfo` type holds the structure of the data that is to be counted. Due to the vast amount of data processed, information like the name and location of a function call is not stored here, but instead written to disk (see below). + + Every new feature must be of the [`Feature`](https://github.com/Code-Inspect/flowr/tree/main/src/statistics/features/feature.ts) type, with `Info` referring to a `FeatureInfo` (like `ExampleInfo` in this example). Next to a `name` and a `description`, each Feature must provide: + + - a processor that extracts the information from the input, adding it to the existing information. + - a function returning the initial value of the information (in this case, `initialExampleInfo`). + +3. **Add it to the feature-mapping**\ + Now, in the `feature.ts` file in `src/statistics/features`, add your feature to the `ALL_FEATURES` object. + + +Now, we want to extract something. For the *example* feature created in the previous steps, we choose to count the amount of `COMMENT` tokens. +So we define a corresponding [XPath](https://developer.mozilla.org/en-US/docs/Web/XPath) query: + +```ts +const commentQuery: Query = xpath.parse('//COMMENT') +``` + +Within our feature's `process` function, running the query is as simple as: + +```ts +const comments = commentQuery.select({ node: input.parsedRAst }) +``` + +Now we could do a lot of further processing, but for simplicity, we only record every comment found this way: + +```ts +appendStatisticsFile(example.name, 'comments', comments, input.filepath) +``` + +We use `example.name` to avoid duplication with the name that we have assigned to the feature. It corresponds to the name of the folder in the statistics output. +`'comments'` refers to a freely chosen (but unique) name, that will be used as the name for the output file within the folder. The `comments` variable holds the result of the query, which is an array of nodes. Finally, we pass the `filepath` of the file that was analyzed (if known), so that it can be added to the statistics file (as additional information). + ----- <1>: For more information, see the code documentation at: .