Skip to content

Commit

Permalink
Semgrep Nan Injection codemod (#758)
Browse files Browse the repository at this point in the history
* Semgrep Nan Injection codemod

* report change for each line added

* Apply suggestions from code review

Co-authored-by: Dan D'Avella <dan.davella@pixee.ai>

* add unit tests for all 3 funcs

* test more and handle nested cases

* Hardening suggestions for codemodder-python / semgrep-nan-inj (#766)

Use Assignment Expression (Walrus) In Conditional

Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com>

* handle more generic cases

* handle binop case

---------

Co-authored-by: Dan D'Avella <dan.davella@pixee.ai>
Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Aug 7, 2024
1 parent 1a6596b commit fb1badd
Show file tree
Hide file tree
Showing 7 changed files with 640 additions and 28 deletions.
23 changes: 13 additions & 10 deletions src/codemodder/codemods/libcst_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from codemodder.codemods.base_transformer import BaseTransformerPipeline
from codemodder.codemods.base_visitor import BaseTransformer
from codemodder.codemods.utils import get_call_name
from codemodder.codetf import Change, ChangeSet
from codemodder.codetf import Change, ChangeSet, Finding
from codemodder.context import CodemodExecutionContext
from codemodder.dependency import Dependency
from codemodder.diff import create_diff_from_tree
Expand Down Expand Up @@ -103,14 +103,8 @@ def add_change(self, node, description: str, start: bool = True):
def add_change_from_position(
self, position: CodeRange, description: str, start: bool = True
):
lineno = position.start.line if start else position.end.line
self.file_context.codemod_changes.append(
Change(
lineNumber=lineno,
description=description,
findings=self.file_context.get_findings_for_location(lineno),
)
)
line_number = position.start.line if start else position.end.line
self.report_change_for_line(line_number, description)

def lineno_for_node(self, node):
return self.node_position(node).start.line
Expand All @@ -120,11 +114,20 @@ def add_dependency(self, dependency: Dependency):

def report_change(self, original_node, description: str | None = None):
line_number = self.lineno_for_node(original_node)
self.report_change_for_line(line_number, description)

def report_change_for_line(
self,
line_number,
description: str | None = None,
findings: list[Finding] | None = None,
):
self.file_context.codemod_changes.append(
Change(
lineNumber=line_number,
description=description or self.change_description,
findings=self.file_context.get_findings_for_location(line_number),
findings=findings
or self.file_context.get_findings_for_location(line_number),
)
)

Expand Down
5 changes: 5 additions & 0 deletions src/codemodder/scripts/generate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,11 @@ class DocMetadata:
guidance_explained="This codemod removes the `@csrf_exempt` decorator from a Django view to ensure it's protected against CSRF attacks. However, there are valid cases for using this decorator so make sure to review your application to determine if this is the case.",
need_sarif="Yes (Semgrep)",
),
"nan-injection": DocMetadata(
importance="Medium",
guidance_explained="We believe that this codemod fixes an unsafe typecast call and that the changes are safe and reliable.",
need_sarif="Yes (Semgrep)",
),
}
ALL_CODEMODS_METADATA = (
CORE_CODEMODS | DEFECTDOJO_CODEMODS | SONAR_CODEMODS | SEMGREP_CODEMODS
Expand Down
2 changes: 2 additions & 0 deletions src/core_codemods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from .semgrep.semgrep_enable_jinja2_autoescape import SemgrepEnableJinja2Autoescape
from .semgrep.semgrep_harden_pyyaml import SemgrepHardenPyyaml
from .semgrep.semgrep_jwt_decode_verify import SemgrepJwtDecodeVerify
from .semgrep.semgrep_nan_injection import SemgrepNanInjection
from .semgrep.semgrep_no_csrf_exempt import SemgrepNoCsrfExempt
from .semgrep.semgrep_rsa_key_size import SemgrepRsaKeySize
from .semgrep.semgrep_sql_parameterization import SemgrepSQLParameterization
Expand Down Expand Up @@ -216,5 +217,6 @@
SemgrepHardenPyyaml,
SemgrepRsaKeySize,
SemgrepSQLParameterization,
SemgrepNanInjection,
],
)
10 changes: 1 addition & 9 deletions src/core_codemods/fix_assert_tuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
LibcstTransformerPipeline,
)
from codemodder.codemods.utils_mixin import NameResolutionMixin
from codemodder.codetf import Change
from core_codemods.api import Metadata, ReviewGuidance
from core_codemods.api.core_codemod import CoreCodemod

Expand Down Expand Up @@ -47,14 +46,7 @@ def _report_new_lines(
):
start_line = self.node_position(original_node).start.line
for idx in range(newlines_count):
self.file_context.codemod_changes.append(
Change(
lineNumber=(line_number := start_line + idx),
description=self.change_description,
# For now we can only link the finding to the first line changed
findings=self.file_context.get_findings_for_location(line_number),
)
)
self.report_change_for_line(start_line + idx)


FixAssertTuple = CoreCodemod(
Expand Down
132 changes: 132 additions & 0 deletions src/core_codemods/semgrep/semgrep_nan_injection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from textwrap import dedent

import libcst as cst
from libcst.codemod import ContextAwareVisitor

from codemodder.codemods.base_codemod import (
Metadata,
ReviewGuidance,
ToolMetadata,
ToolRule,
)
from codemodder.codemods.base_visitor import UtilsMixin
from codemodder.codemods.libcst_transformer import (
LibcstResultTransformer,
LibcstTransformerPipeline,
)
from codemodder.codemods.semgrep import SemgrepSarifFileDetector
from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id


class NanInjectionTransformer(LibcstResultTransformer):
change_description = "Add validation to untrusted numerical input to disallow `nan`"

def leave_SimpleStatementLine(
self,
original_node: cst.SimpleStatementLine,
updated_node: cst.SimpleStatementLine,
):

visitor = MatchNodesInLineVisitor(
self.context, file_context=self.file_context, results=self.results
)
original_node.body[0].visit(visitor)
if visitor.matched_nodes:
# For now only handle one matched Call node in a line
return self.replace_with_if_else(
visitor.matched_nodes[0], original_node, updated_node
)
return original_node

def replace_with_if_else(
self,
node: cst.Call,
original_node: cst.SimpleStatementLine,
updated_node: cst.SimpleStatementLine,
):
if not (target_node := self._get_target_in_call(node)):
return original_node

code = dedent(
f"""\
if {self.code(target_node).strip()}.lower() == "nan":
raise ValueError
else:
{self.code(original_node).strip()}
"""
)
self._report_new_lines(original_node)
new_statement = cst.parse_statement(code)
return new_statement.with_changes(leading_lines=updated_node.leading_lines)

def _get_target_in_call(self, node: cst.Call) -> cst.CSTNode:
match (wrapped_node := node.args[0].value):
case cst.Name():
# float(var)
return wrapped_node

case cst.Call(
func=cst.Name("float") | cst.Name("bool") | cst.Name("complex")
):
# bool(float(var)), complex(float(var)), bool(float(var)), etc
return self._get_target_in_call(wrapped_node)
case cst.Call() | cst.BinaryOperation():
return wrapped_node

def _report_new_lines(self, original_node: cst.SimpleStatementLine):
self.report_change(original_node)
line_number = self.lineno_for_node(original_node)
findings = self.file_context.get_findings_for_location(line_number)
for lineno in range(line_number + 1, line_number + 4):
self.report_change_for_line(lineno, findings=findings)


class MatchNodesInLineVisitor(ContextAwareVisitor, UtilsMixin):
"""Visit Call nodes and match if node location matches results."""

def __init__(
self,
context,
file_context,
results,
) -> None:
self.file_context = file_context
ContextAwareVisitor.__init__(self, context)
UtilsMixin.__init__(
self,
results=results,
line_include=file_context.line_include,
line_exclude=file_context.line_exclude,
)

self.matched_nodes: list[cst.Call] = []

def visit_Call(self, node: cst.Call) -> None:
if self.node_is_selected(node):
self.matched_nodes.append(node)


SemgrepNanInjection = SemgrepCodemod(
metadata=Metadata(
name="nan-injection",
summary=NanInjectionTransformer.change_description.title(),
description=NanInjectionTransformer.change_description.title(),
review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW,
tool=ToolMetadata(
name="Semgrep",
rules=[
ToolRule(
id=(
rule_id := "python.django.security.nan-injection.nan-injection"
),
name="nan-injection",
url=semgrep_url_from_id(rule_id),
)
],
),
references=[],
),
transformer=LibcstTransformerPipeline(NanInjectionTransformer),
detector=SemgrepSarifFileDetector(),
requested_rules=[rule_id],
)
12 changes: 3 additions & 9 deletions src/core_codemods/sql_parameterization.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
infer_expression_type,
)
from codemodder.codemods.utils_mixin import NameAndAncestorResolutionMixin
from codemodder.codetf import Change
from codemodder.utils.clean_code import (
NormalizeFStrings,
RemoveEmptyExpressionsFormatting,
Expand Down Expand Up @@ -249,15 +248,10 @@ def transform_module_impl(self, tree: cst.Module) -> cst.Module:
result = tree.visit(ReplaceNodes(new_changed_nodes))
self.changed_nodes = {}
line_number = self.get_metadata(PositionProvider, call).start.line
self.file_context.codemod_changes.append(
Change(
lineNumber=line_number,
description=SQLQueryParameterizationTransformer.change_description,
findings=self.file_context.get_findings_for_location(
line_number
),
)
self.report_change_for_line(
line_number, SQLQueryParameterizationTransformer.change_description
)

# Normalization and cleanup
result = CleanCode(self.context).transform_module(result)

Expand Down
Loading

0 comments on commit fb1badd

Please sign in to comment.