Skip to content

Commit

Permalink
Document and Refactor the Feature Extraction (#360)
Browse files Browse the repository at this point in the history
* refactor: document for `extractUsageStatistics`

* refactor: outsource the definitions for the meta statics from the main statistics module

* [skip ci] wip, refactor: work on failed requests during the statistic extraction

* test-fix: feature statistics recovery wants all keys

* feat, refactor: promote processor context to include dataflow

* doc: document `ALL_FEATURES`

* lint-fix: deal with linter errors

* doc, wip: start to explain the addition of new features in the wiki

* test-fix: increase the default mocha timeout to a minute

* doc: explain how to add a new feature

* refactor: remove redundant feature info type declaration

* doc: update doc to reflect the new feature info style without redundancy

* refactor: `appendStatisticsFile` instead of simply `append` for improved readability

* doc: explain how to count a feature with XPath

* lint-fix: deal with linter issues
  • Loading branch information
EagleoutIce authored Sep 29, 2023
1 parent e3d280e commit a95520e
Show file tree
Hide file tree
Showing 21 changed files with 414 additions and 340 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"build": "tsc --project .",
"lint": "eslint src/ test/",
"doc": "typedoc",
"test": "nyc --no-clean mocha --require ts-node/register --timeout 10000 \"test/**/*.spec.ts\"",
"test": "nyc --no-clean mocha --require ts-node/register --timeout 60000 \"test/**/*.spec.ts\"",
"performance-test": "func() { cd test/performance/ && bash run-all-suites.sh $1 $2; cd ../../; }; func",
"test-full": "npm run test -- --test-installation"
},
Expand Down
2 changes: 1 addition & 1 deletion src/cli/statistics-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ async function getStats() {
processedFeatures,
allRFilesFrom(options.input, options.limit)
)
console.warn(`skipped ${stats.meta.skipped.length} requests due to errors (run with logs to get more info)`)
console.warn(`skipped ${stats.meta.failedRequests.length} requests due to errors (run with logs to get more info)`)

printFeatureStatistics(stats, processedFeatures)
shell.close()
Expand Down
50 changes: 42 additions & 8 deletions src/statistics/features/feature.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
/**
* This module holds the definition of what a {@link Feature} that can be extracted from an R AST is.
*
* Furthermore, it contains the definition of all features that are known in {@link ALL_FEATURES}.
*
* @module
*/
import {
assignments,
comments,
Expand All @@ -11,6 +18,9 @@ import {
} from './supported'
import { EvalOptions } from 'xpath-ts2/src/parse-api'
import { MergeableRecord } from '../../util/objects'
import { NormalizedAst } from '../../r-bridge'
import { DataflowInformation } from '../../dataflow/internal/info'
import { DeepReadonly } from 'ts-essentials'

/**
* Maps each sub-feature name to the number of occurrences of that sub-feature.
Expand All @@ -20,23 +30,47 @@ import { MergeableRecord } from '../../util/objects'
*/
export type FeatureInfo = Record<string, number> & MergeableRecord


/**
* The information and context that a {@link FeatureProcessor} may operate in.
*/
export interface FeatureProcessorInput extends MergeableRecord {
/** The XML Document representing the parsed (non-normalized) R AST */
readonly parsedRAst: Document,
/** The R AST, after the normalization step */
readonly normalizedRAst: DeepReadonly<NormalizedAst>,
/** The dataflow information for the given input */
readonly dataflow: DeepReadonly<DataflowInformation>,
/** The filepath that the document originated from (if present, may be undefined if the input was provided as text) */
readonly filepath: string | undefined
}

/**
* A function that processes the analysis results of a document and returns the feature information.
*/
export type FeatureProcessor<T extends FeatureInfo> = (existing: T, input: FeatureProcessorInput) => T

/**
* A feature is something to be retrieved by the statistics.
*
* @typeParam T - the type of what should be collected for the feature
* @typeParam T - The type of what should be collected for the feature
*
* @see ALL_FEATURES
*/
export interface Feature<T extends FeatureInfo> {
/** a descriptive, yet unique name of the feature */
/** A descriptive, yet unique name of the feature */
readonly name: string
/** a description of the feature */
/** A description of the feature */
readonly description: string
/** a function that retrieves the feature in the document appends it to the existing feature set (we could use a monoid :D), the filepath corresponds to the active file (if any) */
process: (existing: T, input: Document, filepath: string | undefined) => T
/** values to start the existing track from */
initialValue() : T
/** A function that retrieves the feature in the document appends it to the existing feature set (we could use a monoid :D), the filepath corresponds to the active file (if any) */
process: FeatureProcessor<T>
/** Values to start the existing track from */
initialValue: T
}

// eslint-disable-next-line @typescript-eslint/no-explicit-any
/**
* The source of truth for all features that are supported by the statistics.
*/
export const ALL_FEATURES = {
usedPackages: usedPackages,
comments: comments,
Expand Down
32 changes: 15 additions & 17 deletions src/statistics/features/supported/assignments.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
import { Feature, FeatureInfo, Query } from '../feature'
import { Feature, FeatureProcessorInput, Query } from '../feature'
import * as xpath from 'xpath-ts2'
import { append } from '../../output'
import { appendStatisticsFile } from '../../output'
import { Writable } from 'ts-essentials'

export interface AssignmentInfo extends FeatureInfo {
assignmentOperator: number
nestedOperatorAssignment: number
directlyNestedOperatorAssignment: number
specialAssignmentOps: number
}

const initialAssignmentInfo = (): AssignmentInfo => ({
const initialAssignmentInfo = {
assignmentOperator: 0,
specialAssignmentOps: 0,
nestedOperatorAssignment: 0,
directlyNestedOperatorAssignment: 0
})
}

export type AssignmentInfo = Writable<typeof initialAssignmentInfo>


const defaultOperatorAssignmentQuery: Query = xpath.parse(`//EQ_ASSIGN|//LEFT_ASSIGN|//RIGHT_ASSIGN`)
// either <-/<<-/=, with a nested rhs, or ->/->> with a nested lhs
Expand Down Expand Up @@ -66,19 +64,19 @@ export const assignments: Feature<AssignmentInfo> = {
name: 'Assignments',
description: 'all ways to assign something in R',

process(existing: AssignmentInfo, input: Document, filepath: string | undefined): AssignmentInfo {
const assignmentOperators = defaultOperatorAssignmentQuery.select({ node: input })
const nestedOperators = nestedOperatorAssignmentQuery.select({ node: input })
const directlyNestedOperators = directlyNestedOperatorAssignmentQuery.select({ node: input })
const specialAssignmentOps = bracketAssignQuery.select({ node: input }).map(enrichOpForBracketAssign)
process(existing: AssignmentInfo, input: FeatureProcessorInput): AssignmentInfo {
const assignmentOperators = defaultOperatorAssignmentQuery.select({ node: input.parsedRAst })
const nestedOperators = nestedOperatorAssignmentQuery.select({ node: input.parsedRAst })
const directlyNestedOperators = directlyNestedOperatorAssignmentQuery.select({ node: input.parsedRAst })
const specialAssignmentOps = bracketAssignQuery.select({ node: input.parsedRAst }).map(enrichOpForBracketAssign)

existing.nestedOperatorAssignment += nestedOperators.length
existing.directlyNestedOperatorAssignment += directlyNestedOperators.length
existing.assignmentOperator += assignmentOperators.length
existing.specialAssignmentOps += specialAssignmentOps.length

append(this.name, 'assignmentOperator', assignmentOperators, filepath)
append(this.name, 'specialAssignmentOps', specialAssignmentOps, filepath)
appendStatisticsFile(this.name, 'assignmentOperator', assignmentOperators, input.filepath)
appendStatisticsFile(this.name, 'specialAssignmentOps', specialAssignmentOps, input.filepath)

return existing
},
Expand Down
52 changes: 21 additions & 31 deletions src/statistics/features/supported/comments.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,11 @@
import { Feature, FeatureInfo, Query } from '../feature'
import { Feature, FeatureProcessorInput, Query } from '../feature'
import * as xpath from 'xpath-ts2'
import { guard, isNotNull, isNotUndefined } from '../../../util/assert'
import { append } from '../../output'

export interface CommentInfo extends FeatureInfo {
totalAmount: number
roxygenComments: number
import: number
importFrom: number
importMethodsFrom: number
importClassesFrom: number
export: number
exportClass: number
exportMethod: number
exportS3Method: number
exportPattern: number
useDynLib: number
}
import { appendStatisticsFile } from '../../output'
import { Writable } from 'ts-essentials'


const initialCommentInfo = (): CommentInfo => ({
const initialCommentInfo = {
totalAmount: 0,
roxygenComments: 0,
import: 0,
Expand All @@ -31,7 +18,10 @@ const initialCommentInfo = (): CommentInfo => ({
exportMethod: 0,
exportS3Method: 0,
exportPattern: 0
})
}

export type CommentInfo = Writable<typeof initialCommentInfo>


const commentQuery: Query = xpath.parse('//COMMENT')

Expand All @@ -54,7 +44,7 @@ const exportPatternRegex = /^'\s*@exportPattern/
function processRoxygenImport(existing: CommentInfo, commentsText: string[], filepath: string | undefined) {
const packages = commentsText.map(text => importRegex.exec(text)?.groups?.package).filter(isNotUndefined)
existing.import += packages.length
append(comments.name, 'import', packages, filepath, true)
appendStatisticsFile(comments.name, 'import', packages, filepath, true)
}

function processWithRegex(commentsText: string[], existing: CommentInfo, regex: RegExp): string[] {
Expand All @@ -68,19 +58,19 @@ function processWithRegex(commentsText: string[], existing: CommentInfo, regex:
function processRoxygenImportFrom(existing: CommentInfo, commentsText: string[], filepath: string | undefined) {
const result = processWithRegex(commentsText, existing, importFromRegex)
existing.importFrom += result.length
append(comments.name, 'importFrom', result, filepath, true)
appendStatisticsFile(comments.name, 'importFrom', result, filepath, true)
}

function processRoxygenImportClassesFrom(existing: CommentInfo, commentsText: string[], filepath: string | undefined) {
const result = processWithRegex(commentsText, existing, importClassesFromRegex)
existing.importClassesFrom += result.length
append(comments.name, 'importClassesFrom', result, filepath, true)
appendStatisticsFile(comments.name, 'importClassesFrom', result, filepath, true)
}

function processRoxygenImportMethodsFrom(existing: CommentInfo, commentsText: string[], filepath: string | undefined) {
const result = processWithRegex(commentsText, existing, importMethodsFrom)
existing.importMethodsFrom += result.length
append(comments.name, 'importMethodsFrom', result, filepath, true)
appendStatisticsFile(comments.name, 'importMethodsFrom', result, filepath, true)
}

function processExports(existing: CommentInfo, comments: string[]) {
Expand All @@ -107,15 +97,15 @@ function processRoxygenUseDynLib(existing: CommentInfo, commentsText: string[],
.flatMap(processMatchForDynLib)

existing.useDynLib += result.length
append(comments.name, 'useDynLib', result, filepath, true)
appendStatisticsFile(comments.name, 'useDynLib', result, filepath, true)
}

export const comments: Feature<CommentInfo> = {
name: 'Comments',
description: 'All comments that appear within the document',

process(existing: CommentInfo, input: Document, filepath: string | undefined): CommentInfo {
const comments = commentQuery.select({ node: input }).map(node => node.textContent ?? '#')
process(existing: CommentInfo, input: FeatureProcessorInput): CommentInfo {
const comments = commentQuery.select({ node: input.parsedRAst }).map(node => node.textContent ?? '#')
.map(text => {
guard(text.startsWith('#'), `unexpected comment ${text}`)
return text.slice(1)
Expand All @@ -126,11 +116,11 @@ export const comments: Feature<CommentInfo> = {
const roxygenComments = comments.filter(text => text.startsWith("'"))
existing.roxygenComments += roxygenComments.length

processRoxygenImport(existing, roxygenComments, filepath)
processRoxygenImportFrom(existing, roxygenComments, filepath)
processRoxygenUseDynLib(existing, roxygenComments, filepath)
processRoxygenImportClassesFrom(existing, roxygenComments, filepath)
processRoxygenImportMethodsFrom(existing, roxygenComments, filepath)
processRoxygenImport(existing, roxygenComments, input.filepath)
processRoxygenImportFrom(existing, roxygenComments, input.filepath)
processRoxygenUseDynLib(existing, roxygenComments, input.filepath)
processRoxygenImportClassesFrom(existing, roxygenComments, input.filepath)
processRoxygenImportMethodsFrom(existing, roxygenComments, input.filepath)
processExports(existing, roxygenComments)

return existing
Expand Down
64 changes: 27 additions & 37 deletions src/statistics/features/supported/control-flow.ts
Original file line number Diff line number Diff line change
@@ -1,38 +1,28 @@
import { Feature, FeatureInfo, Query } from '../feature'
import { Feature, FeatureProcessorInput, Query } from '../feature'
import * as xpath from 'xpath-ts2'
import { append } from '../../output'
import { appendStatisticsFile } from '../../output'
import { Writable } from 'ts-essentials'

export interface ControlflowInfo extends FeatureInfo {
ifThen: number
ifThenElse: number
/** can be nested with if-s or if-then-else's */
nestedIfThen: number
nestedIfThenElse: number
/** if(TRUE), ... */
constantIfThen: number
constantIfThenElse: number
/** if(x), ... */
singleVariableIfThen: number
singleVariableIfThenElse: number
/** switch(...) */
switchCase: number
singleVariableSwitchCase: number
constantSwitchCase: number
}

const initialControlflowInfo = (): ControlflowInfo => ({
const initialControlflowInfo = {
ifThen: 0,
ifThenElse: 0,
/** can be nested with if-s or if-then-else's */
nestedIfThen: 0,
nestedIfThenElse: 0,
/** if(TRUE), ... */
constantIfThen: 0,
constantIfThenElse: 0,
/** if(x), ... */
singleVariableIfThen: 0,
singleVariableIfThenElse: 0,
/** switch(...) */
switchCase: 0,
singleVariableSwitchCase: 0,
constantSwitchCase: 0
})
}

export type ControlflowInfo = Writable<typeof initialControlflowInfo>


const ifThenQuery: Query = xpath.parse(`//IF[not(following-sibling::ELSE)]`)
const ifThenElseQuery: Query = xpath.parse(`//IF[following-sibling::ELSE]`)
Expand All @@ -59,20 +49,20 @@ function collectForIfThenOptionalElse(existing: ControlflowInfo, name: 'IfThen'
// select when condition to check if constant, ...
const conditions = selectCondition.select({ node: ifThenOptionalElse })

append(controlflow.name, name, conditions, filepath)
appendStatisticsFile(controlflow.name, name, conditions, filepath)

const constantKey = `constant${name}`
const constantKey = `constant${name}` as keyof ControlflowInfo
const constantConditions = conditions.flatMap(c => constantCondition.select({ node: c }))

existing[constantKey] += constantConditions.length
append(controlflow.name, constantKey, constantConditions, filepath)
appendStatisticsFile(controlflow.name, constantKey, constantConditions, filepath)

const singleVariableKey = `singleVariable${name}`
const singleVariableKey = `singleVariable${name}` as keyof ControlflowInfo
const singleVariableConditions = conditions.flatMap(c => singleVariableCondition.select({ node: c }))
existing[singleVariableKey] += singleVariableConditions.length
append(controlflow.name, singleVariableKey, singleVariableConditions, filepath)
appendStatisticsFile(controlflow.name, singleVariableKey, singleVariableConditions, filepath)

const nestedKey = `nested${name}`
const nestedKey = `nested${name}` as keyof ControlflowInfo
const nestedIfThen = nestedIfThenQuery.select({ node: ifThenOptionalElse })

existing[nestedKey] += nestedIfThen.length
Expand All @@ -82,33 +72,33 @@ export const controlflow: Feature<ControlflowInfo> = {
name: 'Controlflow',
description: 'Deals with if-then-else and switch-case',

process(existing: ControlflowInfo, input: Document, filepath: string | undefined): ControlflowInfo {
process(existing: ControlflowInfo, input: FeatureProcessorInput): ControlflowInfo {

const ifThen = ifThenQuery.select({ node: input })
const ifThenElse = ifThenElseQuery.select({ node: input })
const ifThen = ifThenQuery.select({ node: input.parsedRAst })
const ifThenElse = ifThenElseQuery.select({ node: input.parsedRAst })

existing.ifThen += ifThen.length
existing.ifThenElse += ifThenElse.length

ifThen.forEach(ifThen => { collectForIfThenOptionalElse(existing, 'IfThen', ifThen, filepath) })
ifThenElse.forEach(ifThenElse => { collectForIfThenOptionalElse(existing, 'IfThenElse', ifThenElse, filepath) })
ifThen.forEach(ifThen => { collectForIfThenOptionalElse(existing, 'IfThen', ifThen, input.filepath) })
ifThenElse.forEach(ifThenElse => { collectForIfThenOptionalElse(existing, 'IfThenElse', ifThenElse, input.filepath) })

const switchCases = switchQuery.select({ node: input })
const switchCases = switchQuery.select({ node: input.parsedRAst })
existing.switchCase += switchCases.length
append(controlflow.name, 'switchCase', switchCases, filepath)
appendStatisticsFile(controlflow.name, 'switchCase', switchCases, input.filepath)


const constantSwitchCases = switchCases.flatMap(switchCase =>
constantCondition.select({ node: switchCase })
)
existing.constantSwitchCase += constantSwitchCases.length
append(controlflow.name, 'constantSwitchCase', constantSwitchCases, filepath)
appendStatisticsFile(controlflow.name, 'constantSwitchCase', constantSwitchCases, input.filepath)

const variableSwitchCases = switchCases.flatMap(switchCase =>
singleVariableCondition.select({ node: switchCase })
)
existing.singleVariableSwitchCase += variableSwitchCases.length
append(controlflow.name, 'variableSwitchCase', variableSwitchCases, filepath)
appendStatisticsFile(controlflow.name, 'variableSwitchCase', variableSwitchCases, input.filepath)

return existing
},
Expand Down
Loading

0 comments on commit a95520e

Please sign in to comment.