Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Document and Refactor the Feature Extraction #360

Merged
merged 17 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
a47e8e4
refactor: document for `extractUsageStatistics`
EagleoutIce Sep 27, 2023
52d8b2f
refactor: outsource the definitions for the meta statics from the mai…
EagleoutIce Sep 27, 2023
cd859ae
[skip ci] wip, refactor: work on failed requests during the statistic…
EagleoutIce Sep 27, 2023
2db3057
test-fix: feature statistics recovery wants all keys
EagleoutIce Sep 27, 2023
296372c
feat, refactor: promote processor context to include dataflow
EagleoutIce Sep 27, 2023
e9c2077
doc: document `ALL_FEATURES`
EagleoutIce Sep 27, 2023
5a53078
lint-fix: deal with linter errors
EagleoutIce Sep 27, 2023
f750793
doc, wip: start to explain the addition of new features in the wiki
EagleoutIce Sep 27, 2023
368ba03
Merge branch 'main' into 357-document-the-feature-extraction
EagleoutIce Sep 28, 2023
78d0ea6
test-fix: increase the default mocha timeout to a minute
EagleoutIce Sep 28, 2023
62e7430
Merge remote-tracking branch 'origin/357-document-the-feature-extract…
EagleoutIce Sep 28, 2023
f3baae6
doc: explain how to add a new feature
EagleoutIce Sep 29, 2023
a44793f
refactor: remove redundant feature info type declaration
EagleoutIce Sep 29, 2023
708dc08
doc: update doc to reflect the new feature info style without redundancy
EagleoutIce Sep 29, 2023
af9e95e
refactor: `appendStatisticsFile` instead of simply `append` for impro…
EagleoutIce Sep 29, 2023
c10ab15
doc: explain how to count a feature with XPath
EagleoutIce Sep 29, 2023
9f19bec
lint-fix: deal with linter issues
EagleoutIce Sep 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"build": "tsc --project .",
"lint": "eslint src/ test/",
"doc": "typedoc",
"test": "nyc --no-clean mocha --require ts-node/register --timeout 10000 \"test/**/*.spec.ts\"",
"test": "nyc --no-clean mocha --require ts-node/register --timeout 60000 \"test/**/*.spec.ts\"",
"performance-test": "func() { cd test/performance/ && bash run-all-suites.sh $1 $2; cd ../../; }; func",
"test-full": "npm run test -- --test-installation"
},
Expand Down
2 changes: 1 addition & 1 deletion src/cli/statistics-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ async function getStats() {
processedFeatures,
allRFilesFrom(options.input, options.limit)
)
console.warn(`skipped ${stats.meta.skipped.length} requests due to errors (run with logs to get more info)`)
console.warn(`skipped ${stats.meta.failedRequests.length} requests due to errors (run with logs to get more info)`)

printFeatureStatistics(stats, processedFeatures)
shell.close()
Expand Down
50 changes: 42 additions & 8 deletions src/statistics/features/feature.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
/**
* This module holds the definition of what a {@link Feature} that can be extracted from an R AST is.
*
* Furthermore, it contains the definition of all features that are known in {@link ALL_FEATURES}.
*
* @module
*/
import {
assignments,
comments,
Expand All @@ -11,6 +18,9 @@ import {
} from './supported'
import { EvalOptions } from 'xpath-ts2/src/parse-api'
import { MergeableRecord } from '../../util/objects'
import { NormalizedAst } from '../../r-bridge'
import { DataflowInformation } from '../../dataflow/internal/info'
import { DeepReadonly } from 'ts-essentials'

/**
* Maps each sub-feature name to the number of occurrences of that sub-feature.
Expand All @@ -20,23 +30,47 @@ import { MergeableRecord } from '../../util/objects'
*/
export type FeatureInfo = Record<string, number> & MergeableRecord


/**
* The information and context that a {@link FeatureProcessor} may operate in.
*/
export interface FeatureProcessorInput extends MergeableRecord {
/** The XML Document representing the parsed (non-normalized) R AST */
readonly parsedRAst: Document,
/** The R AST, after the normalization step */
readonly normalizedRAst: DeepReadonly<NormalizedAst>,
/** The dataflow information for the given input */
readonly dataflow: DeepReadonly<DataflowInformation>,
/** The filepath that the document originated from (if present, may be undefined if the input was provided as text) */
readonly filepath: string | undefined
}

/**
* A function that processes the analysis results of a document and returns the feature information.
*/
export type FeatureProcessor<T extends FeatureInfo> = (existing: T, input: FeatureProcessorInput) => T

/**
* A feature is something to be retrieved by the statistics.
*
* @typeParam T - the type of what should be collected for the feature
* @typeParam T - The type of what should be collected for the feature
*
* @see ALL_FEATURES
*/
export interface Feature<T extends FeatureInfo> {
/** a descriptive, yet unique name of the feature */
/** A descriptive, yet unique name of the feature */
readonly name: string
/** a description of the feature */
/** A description of the feature */
readonly description: string
/** a function that retrieves the feature in the document appends it to the existing feature set (we could use a monoid :D), the filepath corresponds to the active file (if any) */
process: (existing: T, input: Document, filepath: string | undefined) => T
/** values to start the existing track from */
initialValue() : T
/** A function that retrieves the feature in the document appends it to the existing feature set (we could use a monoid :D), the filepath corresponds to the active file (if any) */
process: FeatureProcessor<T>
/** Values to start the existing track from */
initialValue: T
}

// eslint-disable-next-line @typescript-eslint/no-explicit-any
/**
* The source of truth for all features that are supported by the statistics.
*/
export const ALL_FEATURES = {
usedPackages: usedPackages,
comments: comments,
Expand Down
32 changes: 15 additions & 17 deletions src/statistics/features/supported/assignments.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
import { Feature, FeatureInfo, Query } from '../feature'
import { Feature, FeatureProcessorInput, Query } from '../feature'
import * as xpath from 'xpath-ts2'
import { append } from '../../output'
import { appendStatisticsFile } from '../../output'
import { Writable } from 'ts-essentials'

export interface AssignmentInfo extends FeatureInfo {
assignmentOperator: number
nestedOperatorAssignment: number
directlyNestedOperatorAssignment: number
specialAssignmentOps: number
}

const initialAssignmentInfo = (): AssignmentInfo => ({
const initialAssignmentInfo = {
assignmentOperator: 0,
specialAssignmentOps: 0,
nestedOperatorAssignment: 0,
directlyNestedOperatorAssignment: 0
})
}

export type AssignmentInfo = Writable<typeof initialAssignmentInfo>


const defaultOperatorAssignmentQuery: Query = xpath.parse(`//EQ_ASSIGN|//LEFT_ASSIGN|//RIGHT_ASSIGN`)
// either <-/<<-/=, with a nested rhs, or ->/->> with a nested lhs
Expand Down Expand Up @@ -66,19 +64,19 @@
name: 'Assignments',
description: 'all ways to assign something in R',

process(existing: AssignmentInfo, input: Document, filepath: string | undefined): AssignmentInfo {
const assignmentOperators = defaultOperatorAssignmentQuery.select({ node: input })
const nestedOperators = nestedOperatorAssignmentQuery.select({ node: input })
const directlyNestedOperators = directlyNestedOperatorAssignmentQuery.select({ node: input })
const specialAssignmentOps = bracketAssignQuery.select({ node: input }).map(enrichOpForBracketAssign)
process(existing: AssignmentInfo, input: FeatureProcessorInput): AssignmentInfo {
const assignmentOperators = defaultOperatorAssignmentQuery.select({ node: input.parsedRAst })
const nestedOperators = nestedOperatorAssignmentQuery.select({ node: input.parsedRAst })
const directlyNestedOperators = directlyNestedOperatorAssignmentQuery.select({ node: input.parsedRAst })
const specialAssignmentOps = bracketAssignQuery.select({ node: input.parsedRAst }).map(enrichOpForBracketAssign)

Check warning on line 71 in src/statistics/features/supported/assignments.ts

View check run for this annotation

Codecov / codecov/patch

src/statistics/features/supported/assignments.ts#L67-L71

Added lines #L67 - L71 were not covered by tests

existing.nestedOperatorAssignment += nestedOperators.length
existing.directlyNestedOperatorAssignment += directlyNestedOperators.length
existing.assignmentOperator += assignmentOperators.length
existing.specialAssignmentOps += specialAssignmentOps.length

append(this.name, 'assignmentOperator', assignmentOperators, filepath)
append(this.name, 'specialAssignmentOps', specialAssignmentOps, filepath)
appendStatisticsFile(this.name, 'assignmentOperator', assignmentOperators, input.filepath)
appendStatisticsFile(this.name, 'specialAssignmentOps', specialAssignmentOps, input.filepath)

Check warning on line 79 in src/statistics/features/supported/assignments.ts

View check run for this annotation

Codecov / codecov/patch

src/statistics/features/supported/assignments.ts#L78-L79

Added lines #L78 - L79 were not covered by tests

return existing
},
Expand Down
52 changes: 21 additions & 31 deletions src/statistics/features/supported/comments.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,11 @@
import { Feature, FeatureInfo, Query } from '../feature'
import { Feature, FeatureProcessorInput, Query } from '../feature'
import * as xpath from 'xpath-ts2'
import { guard, isNotNull, isNotUndefined } from '../../../util/assert'
import { append } from '../../output'

export interface CommentInfo extends FeatureInfo {
totalAmount: number
roxygenComments: number
import: number
importFrom: number
importMethodsFrom: number
importClassesFrom: number
export: number
exportClass: number
exportMethod: number
exportS3Method: number
exportPattern: number
useDynLib: number
}
import { appendStatisticsFile } from '../../output'
import { Writable } from 'ts-essentials'


const initialCommentInfo = (): CommentInfo => ({
const initialCommentInfo = {
totalAmount: 0,
roxygenComments: 0,
import: 0,
Expand All @@ -31,7 +18,10 @@
exportMethod: 0,
exportS3Method: 0,
exportPattern: 0
})
}

export type CommentInfo = Writable<typeof initialCommentInfo>


const commentQuery: Query = xpath.parse('//COMMENT')

Expand All @@ -54,7 +44,7 @@
function processRoxygenImport(existing: CommentInfo, commentsText: string[], filepath: string | undefined) {
const packages = commentsText.map(text => importRegex.exec(text)?.groups?.package).filter(isNotUndefined)
existing.import += packages.length
append(comments.name, 'import', packages, filepath, true)
appendStatisticsFile(comments.name, 'import', packages, filepath, true)

Check warning on line 47 in src/statistics/features/supported/comments.ts

View check run for this annotation

Codecov / codecov/patch

src/statistics/features/supported/comments.ts#L47

Added line #L47 was not covered by tests
}

function processWithRegex(commentsText: string[], existing: CommentInfo, regex: RegExp): string[] {
Expand All @@ -68,19 +58,19 @@
function processRoxygenImportFrom(existing: CommentInfo, commentsText: string[], filepath: string | undefined) {
const result = processWithRegex(commentsText, existing, importFromRegex)
existing.importFrom += result.length
append(comments.name, 'importFrom', result, filepath, true)
appendStatisticsFile(comments.name, 'importFrom', result, filepath, true)

Check warning on line 61 in src/statistics/features/supported/comments.ts

View check run for this annotation

Codecov / codecov/patch

src/statistics/features/supported/comments.ts#L61

Added line #L61 was not covered by tests
}

function processRoxygenImportClassesFrom(existing: CommentInfo, commentsText: string[], filepath: string | undefined) {
const result = processWithRegex(commentsText, existing, importClassesFromRegex)
existing.importClassesFrom += result.length
append(comments.name, 'importClassesFrom', result, filepath, true)
appendStatisticsFile(comments.name, 'importClassesFrom', result, filepath, true)

Check warning on line 67 in src/statistics/features/supported/comments.ts

View check run for this annotation

Codecov / codecov/patch

src/statistics/features/supported/comments.ts#L67

Added line #L67 was not covered by tests
}

function processRoxygenImportMethodsFrom(existing: CommentInfo, commentsText: string[], filepath: string | undefined) {
const result = processWithRegex(commentsText, existing, importMethodsFrom)
existing.importMethodsFrom += result.length
append(comments.name, 'importMethodsFrom', result, filepath, true)
appendStatisticsFile(comments.name, 'importMethodsFrom', result, filepath, true)

Check warning on line 73 in src/statistics/features/supported/comments.ts

View check run for this annotation

Codecov / codecov/patch

src/statistics/features/supported/comments.ts#L73

Added line #L73 was not covered by tests
}

function processExports(existing: CommentInfo, comments: string[]) {
Expand All @@ -107,15 +97,15 @@
.flatMap(processMatchForDynLib)

existing.useDynLib += result.length
append(comments.name, 'useDynLib', result, filepath, true)
appendStatisticsFile(comments.name, 'useDynLib', result, filepath, true)

Check warning on line 100 in src/statistics/features/supported/comments.ts

View check run for this annotation

Codecov / codecov/patch

src/statistics/features/supported/comments.ts#L100

Added line #L100 was not covered by tests
}

export const comments: Feature<CommentInfo> = {
name: 'Comments',
description: 'All comments that appear within the document',

process(existing: CommentInfo, input: Document, filepath: string | undefined): CommentInfo {
const comments = commentQuery.select({ node: input }).map(node => node.textContent ?? '#')
process(existing: CommentInfo, input: FeatureProcessorInput): CommentInfo {

Check warning on line 107 in src/statistics/features/supported/comments.ts

View check run for this annotation

Codecov / codecov/patch

src/statistics/features/supported/comments.ts#L107

Added line #L107 was not covered by tests
const comments = commentQuery.select({ node: input.parsedRAst }).map(node => node.textContent ?? '#')
.map(text => {
guard(text.startsWith('#'), `unexpected comment ${text}`)
return text.slice(1)
Expand All @@ -126,11 +116,11 @@
const roxygenComments = comments.filter(text => text.startsWith("'"))
existing.roxygenComments += roxygenComments.length

processRoxygenImport(existing, roxygenComments, filepath)
processRoxygenImportFrom(existing, roxygenComments, filepath)
processRoxygenUseDynLib(existing, roxygenComments, filepath)
processRoxygenImportClassesFrom(existing, roxygenComments, filepath)
processRoxygenImportMethodsFrom(existing, roxygenComments, filepath)
processRoxygenImport(existing, roxygenComments, input.filepath)
processRoxygenImportFrom(existing, roxygenComments, input.filepath)
processRoxygenUseDynLib(existing, roxygenComments, input.filepath)
processRoxygenImportClassesFrom(existing, roxygenComments, input.filepath)
processRoxygenImportMethodsFrom(existing, roxygenComments, input.filepath)

Check warning on line 123 in src/statistics/features/supported/comments.ts

View check run for this annotation

Codecov / codecov/patch

src/statistics/features/supported/comments.ts#L119-L123

Added lines #L119 - L123 were not covered by tests
processExports(existing, roxygenComments)

return existing
Expand Down
64 changes: 27 additions & 37 deletions src/statistics/features/supported/control-flow.ts
Original file line number Diff line number Diff line change
@@ -1,38 +1,28 @@
import { Feature, FeatureInfo, Query } from '../feature'
import { Feature, FeatureProcessorInput, Query } from '../feature'
import * as xpath from 'xpath-ts2'
import { append } from '../../output'
import { appendStatisticsFile } from '../../output'
import { Writable } from 'ts-essentials'

export interface ControlflowInfo extends FeatureInfo {
ifThen: number
ifThenElse: number
/** can be nested with if-s or if-then-else's */
nestedIfThen: number
nestedIfThenElse: number
/** if(TRUE), ... */
constantIfThen: number
constantIfThenElse: number
/** if(x), ... */
singleVariableIfThen: number
singleVariableIfThenElse: number
/** switch(...) */
switchCase: number
singleVariableSwitchCase: number
constantSwitchCase: number
}

const initialControlflowInfo = (): ControlflowInfo => ({
const initialControlflowInfo = {
ifThen: 0,
ifThenElse: 0,
/** can be nested with if-s or if-then-else's */
nestedIfThen: 0,
nestedIfThenElse: 0,
/** if(TRUE), ... */
constantIfThen: 0,
constantIfThenElse: 0,
/** if(x), ... */
singleVariableIfThen: 0,
singleVariableIfThenElse: 0,
/** switch(...) */
switchCase: 0,
singleVariableSwitchCase: 0,
constantSwitchCase: 0
})
}

export type ControlflowInfo = Writable<typeof initialControlflowInfo>


const ifThenQuery: Query = xpath.parse(`//IF[not(following-sibling::ELSE)]`)
const ifThenElseQuery: Query = xpath.parse(`//IF[following-sibling::ELSE]`)
Expand All @@ -59,20 +49,20 @@ function collectForIfThenOptionalElse(existing: ControlflowInfo, name: 'IfThen'
// select when condition to check if constant, ...
const conditions = selectCondition.select({ node: ifThenOptionalElse })

append(controlflow.name, name, conditions, filepath)
appendStatisticsFile(controlflow.name, name, conditions, filepath)

const constantKey = `constant${name}`
const constantKey = `constant${name}` as keyof ControlflowInfo
const constantConditions = conditions.flatMap(c => constantCondition.select({ node: c }))

existing[constantKey] += constantConditions.length
append(controlflow.name, constantKey, constantConditions, filepath)
appendStatisticsFile(controlflow.name, constantKey, constantConditions, filepath)

const singleVariableKey = `singleVariable${name}`
const singleVariableKey = `singleVariable${name}` as keyof ControlflowInfo
const singleVariableConditions = conditions.flatMap(c => singleVariableCondition.select({ node: c }))
existing[singleVariableKey] += singleVariableConditions.length
append(controlflow.name, singleVariableKey, singleVariableConditions, filepath)
appendStatisticsFile(controlflow.name, singleVariableKey, singleVariableConditions, filepath)

const nestedKey = `nested${name}`
const nestedKey = `nested${name}` as keyof ControlflowInfo
const nestedIfThen = nestedIfThenQuery.select({ node: ifThenOptionalElse })

existing[nestedKey] += nestedIfThen.length
Expand All @@ -82,33 +72,33 @@ export const controlflow: Feature<ControlflowInfo> = {
name: 'Controlflow',
description: 'Deals with if-then-else and switch-case',

process(existing: ControlflowInfo, input: Document, filepath: string | undefined): ControlflowInfo {
process(existing: ControlflowInfo, input: FeatureProcessorInput): ControlflowInfo {

const ifThen = ifThenQuery.select({ node: input })
const ifThenElse = ifThenElseQuery.select({ node: input })
const ifThen = ifThenQuery.select({ node: input.parsedRAst })
const ifThenElse = ifThenElseQuery.select({ node: input.parsedRAst })

existing.ifThen += ifThen.length
existing.ifThenElse += ifThenElse.length

ifThen.forEach(ifThen => { collectForIfThenOptionalElse(existing, 'IfThen', ifThen, filepath) })
ifThenElse.forEach(ifThenElse => { collectForIfThenOptionalElse(existing, 'IfThenElse', ifThenElse, filepath) })
ifThen.forEach(ifThen => { collectForIfThenOptionalElse(existing, 'IfThen', ifThen, input.filepath) })
ifThenElse.forEach(ifThenElse => { collectForIfThenOptionalElse(existing, 'IfThenElse', ifThenElse, input.filepath) })

const switchCases = switchQuery.select({ node: input })
const switchCases = switchQuery.select({ node: input.parsedRAst })
existing.switchCase += switchCases.length
append(controlflow.name, 'switchCase', switchCases, filepath)
appendStatisticsFile(controlflow.name, 'switchCase', switchCases, input.filepath)


const constantSwitchCases = switchCases.flatMap(switchCase =>
constantCondition.select({ node: switchCase })
)
existing.constantSwitchCase += constantSwitchCases.length
append(controlflow.name, 'constantSwitchCase', constantSwitchCases, filepath)
appendStatisticsFile(controlflow.name, 'constantSwitchCase', constantSwitchCases, input.filepath)

const variableSwitchCases = switchCases.flatMap(switchCase =>
singleVariableCondition.select({ node: switchCase })
)
existing.singleVariableSwitchCase += variableSwitchCases.length
append(controlflow.name, 'variableSwitchCase', variableSwitchCases, filepath)
appendStatisticsFile(controlflow.name, 'variableSwitchCase', variableSwitchCases, input.filepath)

return existing
},
Expand Down
Loading