Merge pull request #822 from Mulgish/feature/veracode-improvements

Improve Veracode parser
jenkinsci · Aug 9, 2022 · 5aea564 · 5aea564
2 parents 3ec8fdf + 6139f7a
commit 5aea564
Show file tree

Hide file tree

Showing 3 changed files with 155 additions and 18 deletions.
diff --git a/src/main/java/edu/hm/hafner/analysis/parser/VeraCodePipelineScannerParser.java b/src/main/java/edu/hm/hafner/analysis/parser/VeraCodePipelineScannerParser.java
@@ -1,5 +1,6 @@
 package edu.hm.hafner.analysis.parser;
 
+import org.apache.commons.lang3.StringUtils;
 import org.json.JSONArray;
 import org.json.JSONObject;
 
@@ -9,6 +10,8 @@
 import edu.hm.hafner.analysis.Severity;
 
 import static j2html.TagCreator.*;
+import static org.apache.commons.lang3.StringUtils.startsWith;
+import static org.apache.commons.lang3.StringUtils.endsWith;
 
 /**
  * Parser for Veracode Pipeline Scanner (pipeline-scan) tool.
@@ -40,21 +43,59 @@ private void parseFindings(final Report report, final JSONArray resources, final
     }
 
     private Issue convertToIssue(final JSONObject finding, final IssueBuilder issueBuilder) {
-        final String fileName = getSourceFileField(finding, "file", VALUE_NOT_SET);
+        final String rawFileName = getSourceFileField(finding, "file", VALUE_NOT_SET);
+        final String enrichedFileName = getEnrichedFileName(rawFileName);
         final int line = getSourceFileField(finding, "line", 0);
         final int severity = finding.getInt("severity");
         final String title = finding.getString("title");
         final String issueType = finding.getString("issue_type");
+        final String issueTypeId = finding.getString("issue_type_id");
+        final String scope = getSourceFileField(finding, "scope", VALUE_NOT_SET);
+        final String packageName = getPackageName(scope);
         return issueBuilder
-                .setFileName(fileName)
+                .setFileName(enrichedFileName)
                 .setLineStart(line)
                 .setSeverity(mapSeverity(severity))
-                .setMessage(title)
-                .setType(issueType)
-                .setDescription(formatDescription(fileName, finding))
+                .setMessage(issueType)
+                .setPackageName(packageName)
+                .setType(title)
+                .setCategory(issueTypeId)
+                .setDescription(formatDescription(enrichedFileName, finding))
                 .buildAndClean();
     }
 
+    /**
+     * Prepends .java files with src/main/java so that they can be correctly linked to source code files in Jenkins UI.
+     * Otherwise, files are returned as is.
+     * The solution does not cater for all scenarios but should be sufficient for most common use case (Java/Kotlin project
+     * with Maven folder structure).
+     *
+     * @param rawFileName
+     *         file name reported by Veracode
+     *
+     * @return original file name or a java file name prepended with src/main/java
+     */
+    private String getEnrichedFileName(final String rawFileName) {
+        if (endsWith(rawFileName, ".java") && !startsWith(rawFileName, "src/main/java/")) {
+            return "src/main/java/" + rawFileName;
+        }
+        else if (endsWith(rawFileName, ".kt") && !startsWith(rawFileName, "src/main/kotlin/")) {
+            return "src/main/kotlin/" + rawFileName;
+        }
+        else {
+            return rawFileName;
+        }
+    }
+
+    private String getPackageName(final String scope) {
+        if (scope.contains(".")) {
+            return StringUtils.substringBeforeLast(scope, ".");
+        }
+        else {
+            return VALUE_NOT_SET;
+        }
+    }
+
     /**
      * Retrieve source file values in a null safe manner.
      * Veracode has nested json objects representing a source file ( files -> source_file -> values) for which we need
@@ -115,7 +156,7 @@ private String formatDescription(final String fileName, final JSONObject finding
                 div(b("CWE Id: "), text(cweId)),
                 div(b("Flaw Details: "), text(flawLink)),
                 div(b("Severity: "), text(severity)),
-                p(displayHtml)).render();
+                p(rawHtml(displayHtml))).render();
     }
 
 }
diff --git a/src/test/java/edu/hm/hafner/analysis/parser/VeraCodePipelineScannerParserTest.java b/src/test/java/edu/hm/hafner/analysis/parser/VeraCodePipelineScannerParserTest.java
@@ -20,38 +20,72 @@ class VeraCodePipelineScannerParserTest extends StructuredFileParserTest {
 
     @Override
     protected void assertThatIssuesArePresent(final Report report, final SoftAssertions softly) {
-        softly.assertThat(report).hasSize(5);
+        softly.assertThat(report).hasSize(8);
 
         softly.assertThat(report.get(0))
                 .hasSeverity(Severity.WARNING_HIGH)
-                .hasMessage("org.slf4j.Logger.info")
-                .hasFileName("com/sample/LoggingFilter.java")
-                .hasType("Improper Output Neutralization for Logs")
+                .hasType("org.slf4j.Logger.info")
+                .hasCategory("taint")
+                .hasFileName("src/main/java/com/sample/LoggingFilter.java")
+                .hasPackageName("com.sample")
+                .hasMessage("Improper Output Neutralization for Logs")
                 .hasLineStart(28);
         softly.assertThat(report.get(1))
                 .hasSeverity(Severity.WARNING_NORMAL)
-                .hasMessage("set")
+                .hasType("set")
+                .hasCategory("crypto")
                 .hasFileName("react/dist/esm/data.js")
-                .hasType("Use of Hard-coded Password")
+                .hasPackageName("-")
+                .hasMessage("Use of Hard-coded Password")
                 .hasLineStart(25);
         softly.assertThat(report.get(2))
                 .hasSeverity(Severity.WARNING_LOW)
-                .hasMessage("management:endpoint:health:show-details:")
+                .hasType("management:endpoint:health:show-details:")
+                .hasCategory("crypto")
                 .hasFileName("BOOT-INF/classes/application.yml")
-                .hasType("Information Exposure Through Sent Data")
+                .hasPackageName("application")
+                .hasMessage("Information Exposure Through Sent Data")
                 .hasLineStart(1);
         softly.assertThat(report.get(3))
                 .hasSeverity(Severity.WARNING_LOW)
-                .hasMessage("nosourcefile")
+                .hasType("nosourcefile")
+                .hasCategory("other")
                 .hasFileName("-")
-                .hasType("No source_file present")
+                .hasPackageName("-")
+                .hasMessage("No source_file present")
                 .hasLineStart(0);
         softly.assertThat(report.get(4))
                 .hasSeverity(Severity.WARNING_LOW)
-                .hasMessage("nofiles")
+                .hasType("nofiles")
+                .hasCategory("other")
                 .hasFileName("-")
-                .hasType("No files present")
+                .hasPackageName("-")
+                .hasMessage("No files present")
                 .hasLineStart(0);
+        softly.assertThat(report.get(5))
+                .hasSeverity(Severity.WARNING_HIGH)
+                .hasType("child_process.spawn")
+                .hasCategory("taint")
+                .hasFileName("lib/optimizer/Optimizer.js")
+                .hasPackageName("-")
+                .hasMessage("Improper Neutralization of Special Elements used in an OS Command ('OS Command Injection')")
+                .hasLineStart(24);
+        softly.assertThat(report.get(6))
+                .hasSeverity(Severity.WARNING_HIGH)
+                .hasType("org.slf4j.Logger.info")
+                .hasCategory("taint")
+                .hasFileName("src/main/java/com/othersample/LoggingFilter.java")
+                .hasPackageName("com.othersample")
+                .hasMessage("Improper Output Neutralization for Logs")
+                .hasLineStart(55);
+        softly.assertThat(report.get(7))
+                .hasSeverity(Severity.WARNING_HIGH)
+                .hasType("org.slf4j.Logger.info")
+                .hasCategory("taint")
+                .hasFileName("src/main/kotlin/com/othersample/LoggingFilterV2.kt")
+                .hasPackageName("com.othersample")
+                .hasMessage("Improper Output Neutralization for Logs")
+                .hasLineStart(55);
     }
 
     @Test

diff --git a/src/test/resources/edu/hm/hafner/analysis/parser/veracode_pipeline_scanner_result.json b/src/test/resources/edu/hm/hafner/analysis/parser/veracode_pipeline_scanner_result.json
@@ -93,6 +93,68 @@
       "cwe_id": "201",
       "display_text": "No files present",
       "flaw_details_link": "http://localhost/details/5"
+    },
+    {
+      "title": "child_process.spawn",
+      "issue_id": 1012,
+      "gob": "B",
+      "severity": 5,
+      "issue_type_id": "taint",
+      "issue_type": "Improper Neutralization of Special Elements used in an OS Command (\u0027OS Command Injection\u0027)",
+      "cwe_id": "78",
+      "display_text": "\u003cspan\u003eThis call to child_process.spawn() contains a command injection flaw.  The argument to the function is constructed using untrusted input.  If an attacker is allowed to specify all or part of the command, it may be possible to execute commands on the server with the privileges of the executing process.  The level of exposure depends on the effectiveness of input validation routines, if any. \u003c/span\u003e \u003cspan\u003eValidate all untrusted input to ensure that it conforms to the expected format, using centralized data validation routines when possible.  When using blocklists, be sure that the sanitizing routine performs a sufficient number of iterations to remove all instances of disallowed characters.  Most APIs that execute system commands also have a \"safe\" version of the method that takes an array of strings as input rather than a single string, which protects against some forms of command injection.\u003c/span\u003e \u003cspan\u003eReferences: \u003ca href\u003d\"https://cwe.mitre.org/data/definitions/78.html\"\u003eCWE\u003c/a\u003e \u003ca href\u003d\"https://www.owasp.org/index.php/Command_Injection\"\u003eOWASP\u003c/a\u003e \u003ca href\u003d\"https://webappsec.pbworks.com/OS-Commanding\"\u003eWASC\u003c/a\u003e\u003c/span\u003e",
+      "files": {
+        "source_file": {
+          "file": "lib/optimizer/Optimizer.js",
+          "line": 24,
+          "function_name": "!func",
+          "qualified_function_name": "!main.spawnProcess.!func",
+          "function_prototype": "!js_object !func(!js_object, ...)",
+          "scope": "^::!main::spawnProcess"
+        }
+      }
+    },
+    {
+      "title": "org.slf4j.Logger.info",
+      "issue_id": 1000,
+      "gob": "B",
+      "severity": 4,
+      "issue_type_id": "taint",
+      "issue_type": "Improper Output Neutralization for Logs",
+      "cwe_id": "117",
+      "display_text": "\u003cspan\u003eThis call to org.slf4j.Logger.info() could result in a log forging attack.  Writing untrusted data into a log file allows an attacker to forge log entries or inject malicious content into log files.  Corrupted log files can be used to cover an attacker\u0027s tracks or as a delivery mechanism for an attack on a log viewing or processing utility.  For example, if a web administrator uses a browser-based utility to review logs, a cross-site scripting attack might be possible. The second argument to info() contains tainted data from the variables (new Object\\[...\\]). The tainted data originated from an earlier call to AnnotationVirtualController.vc_annotation_entry.\u003c/span\u003e \u003cspan\u003eAvoid directly embedding user input in log files when possible.  Sanitize untrusted data used to construct log entries by using a safe logging mechanism such as the OWASP ESAPI Logger, which will automatically remove unexpected carriage returns and line feeds and can be configured to use HTML entity encoding for non-alphanumeric data.  Alternatively, some of the XSS escaping functions from the OWASP Java Encoder project will also sanitize CRLF sequences.  Only create a custom blocklist when absolutely necessary.  Always validate untrusted input to ensure that it conforms to the expected format, using centralized data validation routines when possible.\u003c/span\u003e \u003cspan\u003eReferences: \u003ca href\u003d\"https://cwe.mitre.org/data/definitions/117.html\"\u003eCWE\u003c/a\u003e \u003ca href\u003d\"https://owasp.org/www-community/attacks/Log_Injection\"\u003eOWASP\u003c/a\u003e \u003ca href\u003d\"https://docs.veracode.com/r/review_cleansers?tocId\u003dnYnZqAenFFZmB75MQrZwuA\"\u003eSupported Cleansers\u003c/a\u003e\u003c/span\u003e",
+      "files": {
+        "source_file": {
+          "file": "src/main/java/com/othersample/LoggingFilter.java",
+          "line": 55,
+          "function_name": "preMatchingFilter",
+          "qualified_function_name": "com.sample.LoggingFilter.preMatchingFilter",
+          "function_prototype": "void preMatchingFilter(javax.ws.rs.container.ContainerRequestContext)",
+          "scope": "com.othersample.LoggingFilter"
+        }
+      },
+      "flaw_details_link": "http://localhost/details/1"
+    },
+    {
+      "title": "org.slf4j.Logger.info",
+      "issue_id": 1000,
+      "gob": "B",
+      "severity": 4,
+      "issue_type_id": "taint",
+      "issue_type": "Improper Output Neutralization for Logs",
+      "cwe_id": "117",
+      "display_text": "\u003cspan\u003eThis call to org.slf4j.Logger.info() could result in a log forging attack.  Writing untrusted data into a log file allows an attacker to forge log entries or inject malicious content into log files.  Corrupted log files can be used to cover an attacker\u0027s tracks or as a delivery mechanism for an attack on a log viewing or processing utility.  For example, if a web administrator uses a browser-based utility to review logs, a cross-site scripting attack might be possible. The second argument to info() contains tainted data from the variables (new Object\\[...\\]). The tainted data originated from an earlier call to AnnotationVirtualController.vc_annotation_entry.\u003c/span\u003e \u003cspan\u003eAvoid directly embedding user input in log files when possible.  Sanitize untrusted data used to construct log entries by using a safe logging mechanism such as the OWASP ESAPI Logger, which will automatically remove unexpected carriage returns and line feeds and can be configured to use HTML entity encoding for non-alphanumeric data.  Alternatively, some of the XSS escaping functions from the OWASP Java Encoder project will also sanitize CRLF sequences.  Only create a custom blocklist when absolutely necessary.  Always validate untrusted input to ensure that it conforms to the expected format, using centralized data validation routines when possible.\u003c/span\u003e \u003cspan\u003eReferences: \u003ca href\u003d\"https://cwe.mitre.org/data/definitions/117.html\"\u003eCWE\u003c/a\u003e \u003ca href\u003d\"https://owasp.org/www-community/attacks/Log_Injection\"\u003eOWASP\u003c/a\u003e \u003ca href\u003d\"https://docs.veracode.com/r/review_cleansers?tocId\u003dnYnZqAenFFZmB75MQrZwuA\"\u003eSupported Cleansers\u003c/a\u003e\u003c/span\u003e",
+      "files": {
+        "source_file": {
+          "file": "com/othersample/LoggingFilterV2.kt",
+          "line": 55,
+          "function_name": "preMatchingFilter",
+          "qualified_function_name": "com.sample.LoggingFilterV2.preMatchingFilter",
+          "function_prototype": "void preMatchingFilter(javax.ws.rs.container.ContainerRequestContext)",
+          "scope": "com.othersample.LoggingFilterV2"
+        }
+      },
+      "flaw_details_link": "http://localhost/details/1"
     }
   ],
   "pipeline_scan": "22.7.0-0",