Skip to content

Latest commit

 

History

History
648 lines (464 loc) · 28.8 KB

plugin-infrastructure.md

File metadata and controls

648 lines (464 loc) · 28.8 KB

Plugin Project Structure

Table of contents

Plugin Project Architecture

System Overview

(Abbreviated) The architecture of the community edition is depicted in the following diagram:

Architecture Diagram

Plugin Structure

.
├── build
│   ├── analysis-config                                  // The final product generated by this configuration project
│   │   ├── plugins
│   │   │   ├── feysh-config-commercial-plugin-2.0-SNAPSHOT.zip  // Commercial checker plugin, depends on common plugin
│   │   │   ├── feysh-config-community-plugin-2.0-SNAPSHOT.zip // Community checker plugin, depends on common plugin
│   │   │   └── feysh-config-general-plugin-2.0-SNAPSHOT.zip   // Common plugin
│   │   └── rules   // Not only includes taint rules, loaded and managed by the default general rule manager com.feysh.corax.config.general.rule.RuleManager, can also be loaded in custom formats
│   │       ├── commercial.sinks.json                            // Sinks data required by the commercial plugin
│   │       ├── clear.summaries.json                           // Summary data enumeration for java container clear methods
│   │       ├── community.sinks.json                           // Sinks data required by the community plugin
│   │       ├── general.sources.json                           // Sources data required by both plugins
│   │       ├── general.summaries.json                         // Summary data required by both plugins
│   │       ├── javaseccode.summaries.json                     // Summary (sanitizer) data for javaseccode
│   │       ├── owasp.summaries.json                           // Summary (sanitizer) data for owasp benchmark
│   │       ├── public-checkers.access-path.json               // Some data grouped by kind that some checkers depend on (access path)
│   │       ├── supplement.sinks.json                          // Supplementary sinks data
│   │       ├── supplement.sources.json                        // Supplementary sources data
│   │       └── supplement.summaries.json                      // Supplementary summary data
│   ├── ...
├── corax-config-api                                     // Project module. An API wrapper, actually obtains corax-config-api.jar from the engine
│   ├── build
│   │   ├── ...
│   └── build.gradle.kts                                       // Gradle Kotlin configuration script, empty API wrapper
├── corax-config-community                                     // Project module. Mainly contains checker definitions
│   ├── build
│   │   ├── ...
│   ├── build.gradle.kts                                       // Gradle Kotlin configuration script
│   ├── gradle.properties                                      // Gradle configuration, defines pluginId, ginProvider, pluginDependencies
│   ├── rules                                                  // Will be automatically copied to build/analysis-config/rules
│   │   ├── community.sinks.json                               // Sinks data required by the community plugin
│   │   └── public-checkers.access-path.json                   // Some data grouped by kind required by the plugin
│   └── src                                                    // Plugin source code directory
│       ├── main
│       │   └── kotlin                                         // Plugin source code, can mix Kotlin, Java code
│       │   └── resources                                      // Plugin resources
│       └── test
│           ├── kotlin                                         // Plugin unit test source code
│           └── resources                                      // Plugin unit test resources
├── corax-config-general                                 // Project module. Defines some common classes and utilities
│   ├── build
│   │   ├── ...
│   ├── build.gradle.kts
│   ├── gradle.properties
│   ├── rules                                                   // Will be automatically copied to build/analysis-config/rules
│   │   ├── clear.summaries.json                                // Refer to the description of build/analysis-config/rules above
│   │   ├── general.sources.json
│   │   ├── general.summaries.json
│   │   ├── javaseccode.summaries.json
│   │   ├── owasp.summaries.json
│   │   ├── supplement.sources.json
│   │   └── supplement.summaries.json
│   └── src
│       ├── main
│       │   ├── kotlin                                          // Source code
│       │   └── resources                                       // Resources
│       └── test
│           ├── kotlin
│           └── resources
├── corax-config-tests                                    // Project module. Manually writing test cases, marking defective code and corresponding compliant code,
│                                                                // used to check analyzer and checker false positives and false negatives
│   ├── build
│   │   ├── ...
│   ├── build.gradle.kts
│   ├── libs                                                     // Third-party library dependencies
│   │   └── platforms                                            // Android platforms
│   │       └── android-7
│   │           └── android.jar
│   └── src
│       └── main
│           ├── java                                             // Test cases
│           └── kotlin
├── docs                                                  // Detailed documentation
│   ├── ...
├── gradle                                                // Gradle wrapper
│   └── wrapper
│       ├── ...
├── Readme.md
├── build.gradle.kts                                      // Gradle Kotlin configuration script
├── build.local.env.gradle.kts
├── gradle-local.properties                               // Manually fill in the full path to the corax_cli engine
├── gradle.properties                                     // Gradle constant configuration
├── gradlew
├── gradlew.bat
└── settings.gradle

Basic Types

Classes starting with the letter I are generally interfaces.

IStandard

​ Each standard set contains many rules (e.g., the CWE standard includes rules such as cwe-78, cwe-79, etc.), and each type of vulnerability generally corresponds to one or more rules in a standard set.

​ The following are some built-in types of standard sets:

  • BuiltinGeneralStandard.CWE
  • BuiltinGeneralStandard.CERT
  • BuiltinGeneralStandard.FEYSH

The built-in definitions are as follows; you can create a new standard set by extending the IStandard interface:

package com.feysh.corax.config.builtin.standard
enum class BuiltinGeneralStandard(
    override val realName: String,
    override val desc: String
) : IStandard {
    FEYSH("feysh", "feysh standard"),
    CWE("cwe", "Common Weakness Enumeration. https://cwe.mitre.org/index.html"),
    CERT("cert", "SEI CERT Oracle Coding Standard for Java. https://wiki.sei.cmu.edu/confluence/display/java"),
}

IRule

​ Objects of the IRule interface type are rules within a standard set.

​ The engine comes with several CWE rules defined as follows:

package com.feysh.corax.config.builtin.standard
enum class BuiltinCWERules(
    override val realName: String,
    override val desc: String
) : IRule {
    CWE476_NULLPointer("cwe-476","NULL Pointer Dereference"),
    CWE561_DeadCode("cwe-561","Contains dead code, which can never be executed."),
    CWE563_UnusedVariable("cwe-563","Assignment to Variable without Use"),
    ;
    // The enumerated types defined above belong to the BuiltinGeneralStandard.CWE standard set
    // BuiltinCWERules.CWE476_NULLPointer.standard can be used to obtain BuiltinGeneralStandard.CWE
    override val standard: IStandard = BuiltinGeneralStandard.CWE
}

You can extend the definition of these rules as follows: CWERules.kt

IBugCategory

​ Interface definition for bug types. A bug can be described using different categories to assist in understanding the category and facilitate management. Categories can be subdivided, so a category type often includes multiple parent category types (use the IBugCategory.parent field to describe).

​ The engine comes with several BuiltinBugCategory definitions as follows:

package com.feysh.corax.config.builtin.category

import com.feysh.corax.config.api.IBugCategory


enum class BuiltinBugCategory(
    override val parent: Set<IBugCategory>,
    override val realName: String,
    override val desc: String,
) : IBugCategory {

    // level 1
    SECURITY(emptySet(), "Security", "A use of untrusted input in a way that could create a remotely exploitable security vulnerability."),
    PERFORMANCE(emptySet(), "Performance", "Code that is not necessarily incorrect but may be inefficient."),
    Style(emptySet(), "Dodgy Code", "Code that is confusing, anomalous, or written in a way that leads itself to errors."),
    EXPERIMENTAL(emptySet(), "Experimental", "Experimental and not fully vetted bug patterns"),
    Quality(emptySet(), "Quality", "low-quality code"),

    // level 2
    Injection(setOf(SECURITY), "Injection", "Injection flaws are very prevalent, particularly in legacy code. Injection vulnerabilities are often found in SQL, LDAP, XPath, or NoSQL queries, OS commands, XML parsers, SMTP headers, expression languages, and ORM queries. Injection flaws are easy to discover when examining code."),
    SensitiveDataExposure(setOf(SECURITY), "unencrypted sensitive data exposure", ""),
    Misuse(setOf(SECURITY), "misuse", ""),

    // level 3
    NullPointer(setOf(Quality), "null pointer",""),
    DeadCode(setOf(Quality), "dead code",""),
    DefineUnused(setOf(Quality), "define unused",""),
    DeadStore(setOf(Quality), "dead store",""),
    ;
}

You can extend the definition of these categories as follows: BugCategory.kt

IChecker

​ Defines a checker for inspecting a series of similar issues. A checker can contain multiple CheckTypes.

​ For example, using Kotlin object to define a checker named SqliChecker that implements the IChecker interface:

object SqliChecker : IChecker {
    override val report: IRule = CWERules.CWE89_SQLI   // The bug rule name used in the final report is derived from this field
    override val category: IBugCategory = BugCategory.Sqli  // The bug category name used in the final report is derived from this field
    override val standards: Set<IRule> = setOf(             // Specify which common standards the checker belongs to
        CWERules.CWE89_SQLI,
    )
    
    .CheckTypes ...
}

You can extend the definition of this checker by referring to CheckerDeclarations.kt.

CheckType

​ Generally defined within the Checker's internal structure, as shown in the code below:

object SqliChecker : IChecker {

    // CheckType
    object SqlInjectionTyped : CheckType() {  // Inherits from the CheckType class
        override val bugMessage: Map<Language, BugMessage> = mapOf(  // Define key information in both Chinese (ZH) and English (EN) for the report
            Language.ZH to msgGenerator { "Using `$callee` in ${args["type"]} may be vulnerable to SQL injection" },
            Language.EN to msgGenerator { "This use of `$callee` can be vulnerable to SQL injection in the ${args["type"]}" }
        )
        override val checker: IChecker = SqliChecker // Specify which checker this CheckType belongs to, generally the outer class (SqliChecker)
    }
    
    CheckTypes...
 }

​ You can extend the definition of this checker by referring to CheckerDeclarations.kt.

Language

​ Defines the language for the report. Currently, only Chinese (Language.ZH) and English (Language.EN) are supported. Both languages are present in the report. If needed, further customization for the report output language can be implemented.

BugMessage

​ Generates key information for the report using the msgGenerator Kotlin method.

​ For instance, for the given Java code method calls:

class Foo {
    void containerA() {
        
        // Assuming the actual invocation is <Bar: String calleeB()>
        // Method container: <Foo: void containerA()>
        // Invocation Stmt: jimple ir: $r = virtualInvoke $this.<Foo: Object calleeB()>()
        // Invocation expression: virtualInvoke $this.<Foo: Object calleeB()>()
        // Target method in the invocation edge: <Bar: String calleeB()>, not <Foo: Object calleeB()>
        Object r = calleeB(); 
        
        
        // Assuming the actual invocation is <Foo: java.sql.Statement JdbcSqlSinkA(java.lang.Object)>
        // Method container: <Foo: void containerA()>
        // Invocation Stmt: jimple ir: $r = virtualInvoke $this.<Foo: java.sql.Statement JdbcSqlSinkA(java.lang.Object)>()
        // Invocation expression: virtualInvoke $this.<Foo: java.sql.Statement JdbcSqlSinkA(java.lang.Object)>()
        // Target method in the invocation edge: <Foo: java.sql.Statement JdbcSqlSinkA(java.lang.Object)>
        JdbcSqlSinkA(r);
    }
    
    @Override 
    Object calleeB(){
        ...
    }
}

class Bar extends Foo {
    @Override 
    String calleeB(){
        ...
    }
}
data class BugMessage(val msg: Env.() -> String) {
    interface Env {
        val container: SootMethod?     // Method container
        val callSite: soot.Unit?       // Invocation statement
        val invokeExpr: InvokeExpr?    // Invocation expression
        val callee: SootMethod?        // Target method in the invocation edge

        val clazz: SootClass?          // Filled in by the java class check defined in PreAnalysisUnit.IClassCheckPoint
        val method: SootMethod?        // Filled in by the java method check defined in PreAnalysisUnit.IMethodCheckPoint
        val field: SootField?          // Filled in by the java field check defined in PreAnalysisUnit.IFieldCheckPoint
        val fileName: String?          // Filled in by the resource file check defined in PreAnalysisUnit.ISourceFileCheckPoint

        var lineNumber: Int            // Line number of the bug
        var columnNumber: Int          // Column number of the bug

        val args: MutableMap<Any, Any> // Custom data from the checker unit can be stored in this args map
    }
}

fun msgGenerator(msg: BugMessage.Env.() -> String) = BugMessage(msg)

​ In the event of reporting a bug at the specified location, use the following Kotlin code:

msgGenerator { "Using ${args["type"]} in $callee may be vulnerable to SQL injection"}

​ This will generate the corresponding message: "Using JdbcSqlSink in <Foo: java.sql.Statement JdbcSqlSinkA(java.lang.Object)> may be vulnerable to SQL injection"

SAOptions

​ Facilitates the external configuration of this object through a custom serializable class. Refer to Readme.md#Compilation-and-Build for details. Open the generated YAML configuration file, find the options field in the SSLContext unit, modify the configuration, and save. The analyzer will automatically deserialize this YAML file into an SSLContext.Options and fill in the SSLContext.options field during the next analysis. Then, the configurations in options.riskAlgorithm become effective when calling the AIAnalysisUnit.config method.

object SSLContext : AIAnalysisUnit() { // Must inherit from CheckerUnit
    
    @Serializable // Kotlin serialization annotation to ensure automatic serialization and deserialization of this class
    class Options : SAOptions {
        val riskAlgorithm = listOf("SSl", "SSLv2", "SSLv3", "TLS", "TLSv1", "TLSv1.1") // Default configuration data
    }

    private var options: Options = Options() // Must be set as var, cannot use the Kotlin val keyword or add the Java final modifier
    
    context (AIAnalysisApi)
    override fun config() {
        options.riskAlgorithm // Read customizable configuration externally
    }
}

​ Refer to weak-ssl.kt for the complete code.

IMethodMatch

Matches multiple soot.SootMethod instances in a soot.Scene based on key information about methods (such as the class name, method name, parameter types, and parameter count).

SootSignatureMatch

Built-in method to match a unique soot.SootMethod based on the Soot method signature.

Example: matchSoot(sootSignature="<Foo: Object calleeB()>()") would yield a single soot.SootMethod: <Foo: Object calleeB()>() (if such a method exists).

RawSignatureMatch

Built-in method to fuzzily match one or more corresponding soot.SootMethod instances based on the Soot method signature.

  • * indicates matching any type.
  • ** indicates matching 0 to many types.

Example: matchSimpleSig(sig="java.lang.String: * <init>(char[],*,**)") can match

0 = "<java.lang.String: void <init>(char[],int)>"
1 = "<java.lang.String: void <init>(char[],int,int)>"
2 = "<java.lang.String: void <init>(char[],int,int,java.lang.Void)>"

Example: matchSimpleSig(sig="java.lang.String: * <init>(char[],**)") can match:

0 = "<java.lang.String: void <init>(char[])>"
1 = "<java.lang.String: void <init>(char[],int)>"
2 = "<java.lang.String: void <init>(char[],int,int)>"
3 = "<java.lang.String: void <init>(char[],int,int,java.lang.Void)>"

​ As you can see from these four soot signatures, the differentiation lies only in the type and number of parameters. It's challenging to understand the purpose of each parameter, making it difficult for both users and developers to comprehend the method's usage and, consequently, to write summaries and checks.

Simple signatures support variable names for better understanding of the method's meaning. The format is short type name + space + variable name. It is recommended to include variable names.

For example:

javax.net.ssl.SSLSocket: void setEnabledProtocols(String[] protocols)java.lang.String: void <init>(char value[], int offset, int count)

Additionally, soot signatures do not support variable names. This may be supported in the future, or a custom format can be defined.

IMethodGrouped

The complete source code is defined in rule/RuleDeclares.kt.

It is an interface that inherits from IMethodSignature and includes a field group: String. This represents a group kind along with a method's signature { signature, subtypes, provenance, ext }.

For instance, the following rule is a combination of a kind field and a complete IMethodSignature, forming an IMethodGrouped:

{"kind":"content-provider","signature":"<javax.swing.text.JTextComponent: java.lang.String getText(int,int)>","subtypes":true,"provenance":"manual","ext":""}

An example file containing such rules is general.sources.json.

RuleManager

The complete source code is defined in rule/RuleManager.kt.

It is a configuration file manager based on kotlinx-serialization for easy loading, parsing files, and serializing some objects to files.

In model/ConfigCenter.kt, you can find code like this:

val methodAccessPathDataBase: GroupedMethodsManager<MethodAccessPath> by lazy {
    // Lazy loading, executed on the first access
    // Search for all files in the 'rules' folder that match the condition `file.name.endsWith("access-path.json")`
    val sourcesJsonFiles = walkFiles(getConfigDirectories()) { file -> file.name.endsWith("access-path.json") }
    // Deserialize these files using 'serializer<MethodAccessPath>()' and return the result to Lazy
    return@lazy GroupedMethodsManager.load<MethodAccessPath>(sourcesJsonFiles, serializer = serializer())
}

You can use a similar approach to conveniently customize loading and saving data in any format by defining a Kotlin serializable data class.

ConfigCenter

The complete source code is defined in model/ConfigCenter.kt.

It is used to load files from build/analysis-config/rules, including taint (source, sink, summary) json configuration files and access-path.json configuration files.

These correspond to ConfigCenter.taintRulesManager and ConfigCenter.methodAccessPathDataBase, respectively.

CheckerUnit

Tips: For more detailed examples, refer to Custom Checker.

It is a base class for a checker unit, divided into PreAnalysisUnit and AIAnalysisUnit.

CheckerUnit

PreAnalysisUnit

A type of checker definition unit primarily responsible for checking and parsing resources. The engine provides built-in capabilities such as:

  • Project resources (e.g., .txt, .xml, .config, .java, .yml, .key, etc...) content, customizable
  • SootClass, SootField, SootMethod jimple IR, Annotations checks
  • Java method call checks (call edge)
  • Java class AST match (javaparser)
PreAnalysisApi

It provides the following checker interfaces:

object ExampleCheckerName : PreAnalysisUnit() {
    context(PreAnalysisApi)
    override fun config() {
        val cg: CallGraph                // Call graph, precision is influenced by --call-graph-algorithm, PAG can be obtained through Scene.v().pointsToAnalysis
        runInScene {}                    // Callback invoked, provides a complete Soot scene environment (call graph, pointsToGraph, etc.), can implement special checks in the callback
        atAnyClass { }                   // Traverse all SootClasses
        atAnyField { }                   // Traverse all SootFields
        atAnyMethod { }                  // Traverse all SootMethods
        atAnyInvoke { }                  // Traverse all Java method invocation points (call edge)
        atAnySourceFile(extension = ?) { }  // Traverse all resource files with a specific extension
        
        atClass(clazz) { }               // Match the specified SootClass
        atField(field) { }               // Match the specified SootField
        atMethod(method) { }             // Match the specified SootMethod
        atInvoke(callee) { }             // Return the invocation points (call edge) that call the callee method
        
        // Defect report submission
        fun ICheckPoint.report(checkType: CheckType, env: BugMessage.Env.() -> Unit = { })
        fun report(checkType: CheckType, atClass: SootClass, startLineNumber: Int, columnNumber: Int, env: BugMessage.Env.() -> Unit = {})
        fun report(checkType: CheckType, file: Path, startLineNumber: Int, columnNumber: Int, env: BugMessage.Env.() -> Unit = {})
    }
}

AIAnalysisUnit

An abstract interpretation analysis custom check unit, mainly checking the semantics of analyzed code. The engine provides APIs to construct custom checkers:

  • Modeling descriptions of methods, extending some properties and binding them to objects
  • Bug checking
AIAnalysisApi
object IbatisAnnotationSQLSinks : AIAnalysisUnit() {

    context(AIAnalysisApi)
    override fun config() {
        beforeConfig{}           // Called before configuration begins
        eachClass{}              // Traverse and handle all SootClasses
        eachField{}              // Traverse and handle all SootFields
        eachMethod{}             // Traverse and handle all SootMethods
        eachLocalVariable{}      // Traverse and handle all local variables in SootMethod
        
        constructor(KMethod) {}    // Handle a constructor
        method(KMethod) {}         // Handle a member method
        staticMethod(KMethod) {}   // Handle a static method
        
        clazz(classMatch) {}       // Handle matching specified class
        field(localVarMatch) {}    // Handle matching specified class member field
        method(methodMatch) {}     // Handle matching specified class method
        localVar(localVarMatch) {} // Handle matching local variables in the specified SootMethod
        afterConfig{}             // Called after configuration ends
    }
}

ISootInitializeHandler

Used to customize the configuration of the soot Scene-related parameters.

Two built-in objects are provided:

image-20230719135238363

Source code:

package com.feysh.corax.config.builtin.soot

import com.feysh.corax.config.api.ISootInitializeHandler

@Suppress("unused")
object EmptySootConfiguration: ISootInitializeHandler

object DefaultSootConfiguration : ISootInitializeHandler {
    @Serializable
    class CustomOptions : SAOptions {
        val excludeList: List<String> = listOf(
            "java.*",
            "javax.*",
            "jdk.*",

            "com.apple.*",
            "apple.awt.*",
            "org.w3c.*",
            "org.xml.*",
            "com.sun.*",
            "sun.*",

            // Excluding classes of android.* will cause layout class loading issues for layout file-based callback analysis.
            // 2020-07-26 (SA): Added back the exclusion because removing it breaks calls to Android SDK stubs.
            // We need a proper test case for the layout file issue and then see how to deal with it.
            "android.*",
            "androidx.*",

            // Logger
            "org.slf4j.*",
            "org.apache.log4j.*",
            "org.apache.logging.*",
            "java.util.logging.*",
            "ch.qos.logback.*",
            "com.mysql.*",

            // "org.apache.*",
            "org.eclipse.*",
            "soot.*"
        )

        val sootPhaseNameToOption: Map<String, String> = mapOf(
            "jb.sils" to "enabled:false", // null_type bug
            "jb.tr" to "ignore-nullpointer-dereferences:true" // don't replace to npe
        )
    }

    var options: CustomOptions = CustomOptions()

    override fun configure(options: Options) {
        // Explicitly include packages for a shorter runtime:
        val excludeList: MutableList<String> = LinkedList()
        excludeList.addAll(this.options.excludeList)
        // soot.Options exclude classes only load their signature, without loading any method body
        // It can speed up the scanning process, reduce analysis of classes that are not of interest, but may lower analysis accuracy
        options.set_exclude(excludeList)
        for ((phaseName, opt) in this.options.sootPhaseNameToOption) {
            options.setPhaseOption(phaseName, opt)
        }
    }
}

DefaultSootConfiguration is the default soot scene configuration for the analyzer. If not manually specified, the analyzer will default to using this configuration.

Complete example: com.feysh.corax.config.builtin.soot.DefaultSootConfiguration