From 9034c4b7bbd18c7b48816e021509e4b2262f057d Mon Sep 17 00:00:00 2001 From: Alberto Vara Date: Wed, 27 Sep 2023 11:51:00 +0200 Subject: [PATCH] feat(iast): index propagation (#7050) ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Title is accurate. - [x] No unnecessary changes are introduced. - [x] Description motivates each change. - [x] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [x] Testing strategy adequately addresses listed risk(s). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] Release note makes sense to a user of the library. - [x] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [x] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [x] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [x] This PR doesn't touch any of that. --- ddtrace/appsec/_iast/_ast/visitor.py | 160 +++++++++++++++++- .../_taint_tracking/Aspects/AspectIndex.cpp | 48 ++++++ .../_taint_tracking/Aspects/AspectIndex.h | 8 + .../appsec/_iast/_taint_tracking/README.txt | 2 +- .../appsec/_iast/_taint_tracking/_native.cpp | 2 + .../appsec/_iast/_taint_tracking/aspects.py | 15 +- .../aspects/test_index_aspect_fixtures.py | 66 ++++++++ .../iast/fixtures/aspects/str_methods.py | 5 +- 8 files changed, 294 insertions(+), 12 deletions(-) create mode 100644 ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.cpp create mode 100644 ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.h create mode 100644 tests/appsec/iast/aspects/test_index_aspect_fixtures.py diff --git a/ddtrace/appsec/_iast/_ast/visitor.py b/ddtrace/appsec/_iast/_ast/visitor.py index 71b19f7ff0..20fc62ea4c 100644 --- a/ddtrace/appsec/_iast/_ast/visitor.py +++ b/ddtrace/appsec/_iast/_ast/visitor.py @@ -4,8 +4,9 @@ import ast import copy import sys +from typing import Any +from typing import List from typing import Set -from typing import TYPE_CHECKING from six import iteritems @@ -14,14 +15,10 @@ from ..constants import DEFAULT_WEAK_RANDOMNESS_FUNCTIONS -if TYPE_CHECKING: # pragma: no cover - from typing import Any - from typing import List - - PY3 = sys.version_info[0] >= 3 PY30_37 = sys.version_info >= (3, 0, 0) and sys.version_info < (3, 8, 0) PY38_PLUS = sys.version_info >= (3, 8, 0) +PY39_PLUS = sys.version_info >= (3, 9, 0) CODE_TYPE_FIRST_PARTY = "first_party" CODE_TYPE_DD = "datadog" @@ -63,6 +60,11 @@ def __init__( "zfill": "ddtrace_aspects.zfill_aspect", "ljust": "ddtrace_aspects.ljust_aspect", }, + # Replacement function for indexes and ranges + "slices": { + "index": "ddtrace_aspects.index_aspect", + # "slice": "ddtrace_aspects.slice_aspect", + }, # Replacement functions for modules "module_functions": { "BytesIO": "ddtrace_aspects.stringio_aspect", @@ -129,6 +131,7 @@ def __init__( self.filename = filename self.module_name = module_name + self._aspect_index = self._aspects_spec["slices"]["index"] self._aspect_functions = self._aspects_spec["functions"] self._aspect_operators = self._aspects_spec["operators"] self._aspect_methods = self._aspects_spec["stringalike_methods"] @@ -292,6 +295,15 @@ def _attr_node(self, from_node, attr, ctx=ast.Load()): # noqa: B008 name_node = self._name_node(from_node, name_attr, ctx=ctx) return self._node(ast.Attribute, from_node, attr=attr_attr, ctx=ctx, value=name_node) + def _assign_node(self, from_node, targets, value): # type: (Any, List[Any], Any) -> Any + return self._node( + ast.Assign, + from_node, + targets=targets, + value=value, + type_comment=None, + ) + def find_insert_position(self, module_node): # type: (ast.Module) -> int insert_position = 0 from_future_import_found = False @@ -559,3 +571,139 @@ def visit_JoinedStr(self, joinedstr_node): # type: (ast.JoinedStr) -> Any self.ast_modified = True _set_metric_iast_instrumented_propagation() return call_node + + def visit_AugAssign(self, augassign_node): # type: (ast.AugAssign) -> Any + """Replace an inplace add or multiply.""" + if isinstance(augassign_node.target, ast.Subscript): + # Can't augassign to function call, ignore this node + augassign_node.target.avoid_convert = True # type: ignore[attr-defined] + self.generic_visit(augassign_node) + return augassign_node + + # TODO: Replace an inplace add or multiply (+= / *=) + return augassign_node + + def visit_Assign(self, assign_node): # type: (ast.Assign) -> Any + """ + Decompose multiple assignment into single ones and + check if any item in the targets list is if type Subscript and if + that's the case further decompose it to use a temp variable to + avoid assigning to a function call. + """ + # a = b = c + # __dd_tmp = c + # a = __dd_tmp + + ret_nodes = [] + + if len(assign_node.targets) > 1: + # Multiple assignments, assign the value to a temporal variable + tmp_var_left = self._name_node(assign_node, "__dd_tmp", ctx=ast.Store()) + assign_value = self._name_node(assign_node, "__dd_tmp", ctx=ast.Load()) + assign_to_tmp = self._assign_node(from_node=assign_node, targets=[tmp_var_left], value=assign_node.value) + ret_nodes.append(assign_to_tmp) + self.ast_modified = True + else: + assign_value = assign_node.value # type: ignore + + for target in assign_node.targets: + if isinstance(target, ast.Subscript): + # We can't assign to a function call, which is anyway going to rewrite + # the index destination so we just ignore that target + target.avoid_convert = True # type: ignore[attr-defined] + elif isinstance(target, (List, ast.Tuple)): + # Same for lists/tuples on the left side of the assignment + for element in target.elts: + if isinstance(element, ast.Subscript): + element.avoid_convert = True # type: ignore[attr-defined] + + # Create a normal assignment. This way we decompose multiple assignments + # like (a = b = c) into a = b and a = c so the transformation above + # is possible. + # Decompose it into a normal, not multiple, assignment + new_assign_value = copy.copy(assign_value) + + new_target = copy.copy(target) + + single_assign = self._assign_node(assign_node, [new_target], new_assign_value) + + self.generic_visit(single_assign) + ret_nodes.append(single_assign) + + if len(ret_nodes) == 1: + return ret_nodes[0] + + return ret_nodes + + def visit_Delete(self, assign_node): # type: (ast.Delete) -> Any + # del replaced_index(foo, bar) would fail so avoid converting the right hand side + # since it's going to be deleted anyway + + for target in assign_node.targets: + if isinstance(target, ast.Subscript): + target.avoid_convert = True # type: ignore[attr-defined] + + self.generic_visit(assign_node) + return assign_node + + def visit_Subscript(self, subscr_node): # type: (ast.Subscript) -> Any + """ + Turn an indexes[1] and slices[0:1:2] into the replacement function call + Optimization: dont convert if the indexes are strings + """ + self.generic_visit(subscr_node) + + # We mark nodes to avoid_convert (check visit_Delete, visit_AugAssign, visit_Assign) due to complex + # expressions that raise errors when try to replace with index aspects + if hasattr(subscr_node, "avoid_convert"): + return subscr_node + + # Optimization: String literal slices and indexes are not patched + if self._is_string_node(subscr_node.value): + return subscr_node + + attr_node = self._attr_node(subscr_node, "") + + call_node = self._call_node( + subscr_node, + func=attr_node, + args=[], + ) + if isinstance(subscr_node.slice, ast.Slice): + # Slice[0:1:2]. The other cases in this if are Indexes[0] + # TODO: Add stlice + # aspect_split = self._aspect_slice.split(".") + # call_node.func.attr = aspect_split[1] + # call_node.func.value.id = aspect_split[0] + # none_node = self._none_constant(subscr_node) + # lower = subscr_node.slice.lower if subscr_node.slice.lower is not None else none_node + # upper = subscr_node.slice.upper if subscr_node.slice.upper is not None else none_node + # step = subscr_node.slice.step if subscr_node.slice.step is not None else none_node + # call_node.args.extend([subscr_node.value, lower, upper, step]) + # self.ast_modified = True + return subscr_node + elif PY39_PLUS: + if self._is_string_node(subscr_node.slice): + return subscr_node + # In Py39+ the if subscr_node.slice member is not a Slice, is directly an unwrapped value + # for the index (e.g. Constant for a number, Name for a var, etc) + aspect_split = self._aspect_index.split(".") + call_node.func.attr = aspect_split[1] + call_node.func.value.id = aspect_split[0] + call_node.args.extend([subscr_node.value, subscr_node.slice]) + # TODO: python 3.8 isn't working correctly with index_aspect, tests raise: + # corrupted size vs. prev_size in fastbins + # Test failed with exit code -6 + # https://app.circleci.com/pipelines/github/DataDog/dd-trace-py/46665/workflows/3cf1257c-feaf-4653-bb9c-fb840baa1776/jobs/3031799 + # elif isinstance(subscr_node.slice, ast.Index): + # if self._is_string_node(subscr_node.slice.value): # type: ignore[attr-defined] + # return subscr_node + # aspect_split = self._aspect_index.split(".") + # call_node.func.attr = aspect_split[1] + # call_node.func.value.id = aspect_split[0] + # call_node.args.extend([subscr_node.value, subscr_node.slice.value]) # type: ignore[attr-defined] + else: + return subscr_node + + self.ast_modified = True + return call_node diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.cpp b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.cpp new file mode 100644 index 0000000000..87b555b8a4 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.cpp @@ -0,0 +1,48 @@ +#include + +#include "AspectIndex.h" + +PyObject* +index_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* idx, TaintRangeMapType* tx_taint_map) +{ + size_t len_result_o{ get_pyobject_size(result_o) }; + auto idx_long = PyLong_AsLong(idx); + TaintRangeRefs ranges_to_set; + auto ranges = get_ranges(candidate_text); + for (const auto& current_range : ranges) { + if (current_range->start <= idx_long and idx_long < (current_range->start + current_range->length)) { + ranges_to_set.emplace_back(initializer->allocate_taint_range(0l, 1l, current_range->source)); + break; + } + } + + const auto& res_new_id = new_pyobject_id(result_o, len_result_o); + Py_DECREF(result_o); + + if (ranges_to_set.empty()) { + return res_new_id; + } + set_ranges(res_new_id, ranges_to_set); + + return res_new_id; +} + +PyObject* +api_index_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs) +{ + if (nargs != 2) { + return nullptr; + } + PyObject* candidate_text = args[0]; + PyObject* idx = args[1]; + + PyObject* result_o; + + result_o = PyObject_GetItem(candidate_text, idx); + auto ctx_map = initializer->get_tainting_map(); + if (not ctx_map or ctx_map->empty()) { + return result_o; + } + auto res = index_aspect(result_o, candidate_text, idx, ctx_map); + return res; +} diff --git a/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.h b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.h new file mode 100644 index 0000000000..d22b52b3ec --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.h @@ -0,0 +1,8 @@ +#pragma once +#include "Aspects/Helpers.h" +#include "TaintedOps/TaintedOps.h" + +PyObject* +index_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* idx, TaintRangeMapType* tx_taint_map); +PyObject* +api_index_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs); diff --git a/ddtrace/appsec/_iast/_taint_tracking/README.txt b/ddtrace/appsec/_iast/_taint_tracking/README.txt index 37d8507e43..f7f67f2d41 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/README.txt +++ b/ddtrace/appsec/_iast/_taint_tracking/README.txt @@ -2,7 +2,7 @@ ```bash sh clean.sh -cmake -DPYTHON_EXECUTABLE:FILEPATH=FILEPATH=/usr/bin/python3.11 . && \ +cmake -DPYTHON_EXECUTABLE:FILEPATH=/usr/bin/python3.11 . && \ make -j _native && \ mv lib_native.so _native.so ``` diff --git a/ddtrace/appsec/_iast/_taint_tracking/_native.cpp b/ddtrace/appsec/_iast/_taint_tracking/_native.cpp index 1614706bc9..8b60ac295f 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/_native.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/_native.cpp @@ -9,6 +9,7 @@ #include #include "Aspects/AspectExtend.h" +#include "Aspects/AspectIndex.h" #include "Aspects/AspectJoin.h" #include "Aspects/AspectOperatorAdd.h" #include "Aspects/_aspects_helpers.h" @@ -29,6 +30,7 @@ static PyMethodDef AspectsMethods[] = { // python 3.5, 3.6. but METH_FASTCALL could be used instead for python // >= 3.7 { "add_aspect", ((PyCFunction)api_add_aspect), METH_FASTCALL, "aspect add" }, + { "index_aspect", ((PyCFunction)api_index_aspect), METH_FASTCALL, "aspect extend" }, { "join_aspect", ((PyCFunction)api_join_aspect), METH_FASTCALL, "aspect add" }, { "extend_aspect", ((PyCFunction)api_extend_aspect), METH_FASTCALL, "aspect extend" }, { nullptr, nullptr, 0, nullptr } diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects.py b/ddtrace/appsec/_iast/_taint_tracking/aspects.py index de9cac9303..a63be03d93 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects.py +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects.py @@ -4,6 +4,8 @@ import codecs import traceback from types import BuiltinFunctionType +from typing import Any +from typing import Callable from typing import TYPE_CHECKING from ddtrace.internal.compat import iteritems @@ -25,8 +27,6 @@ if TYPE_CHECKING: - from typing import Any - from typing import Callable from typing import Dict from typing import List from typing import Optional @@ -40,6 +40,7 @@ _add_aspect = aspects.add_aspect _extend_aspect = aspects.extend_aspect _join_aspect = aspects.join_aspect +_index_aspect = aspects.index_aspect __all__ = ["add_aspect", "str_aspect", "bytearray_extend_aspect", "decode_aspect", "encode_aspect"] @@ -113,6 +114,16 @@ def join_aspect(orig_function, joiner, *args, **kwargs): return joiner.join(*args, **kwargs) +def index_aspect(candidate_text, index) -> Any: + if not isinstance(candidate_text, TEXT_TYPES) or not isinstance(index, int): + return candidate_text[index] + try: + return _index_aspect(candidate_text, index) + except Exception as e: + _set_iast_error_metric("IAST propagation error. index_aspect. {}".format(e), traceback.format_exc()) + return candidate_text[index] + + def bytearray_extend_aspect(orig_function, op1, op2): # type: (Callable, Any, Any) -> Any if not isinstance(orig_function, BuiltinFunctionType): diff --git a/tests/appsec/iast/aspects/test_index_aspect_fixtures.py b/tests/appsec/iast/aspects/test_index_aspect_fixtures.py new file mode 100644 index 0000000000..1fef9380f7 --- /dev/null +++ b/tests/appsec/iast/aspects/test_index_aspect_fixtures.py @@ -0,0 +1,66 @@ +# -*- encoding: utf-8 -*- +import sys + +import pytest + +from ddtrace.appsec._iast._utils import _is_python_version_supported as python_supported_by_iast +from tests.appsec.iast.aspects.conftest import _iast_patched_module + + +if python_supported_by_iast(): + from ddtrace.appsec._iast._taint_tracking import OriginType + from ddtrace.appsec._iast._taint_tracking import get_tainted_ranges + from ddtrace.appsec._iast._taint_tracking import taint_pyobject + + mod = _iast_patched_module("tests.appsec.iast.fixtures.aspects.str_methods") + + +@pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") +def test_string_index_error_index_error(): + with pytest.raises(IndexError) as excinfo: + mod.do_index("abc", 22) # pylint: disable=no-member + assert "string index out of range" in str(excinfo.value) + + +@pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST") +def test_string_index_error_type_error(): + with pytest.raises(TypeError) as excinfo: + mod.do_index("abc", "22") # pylint: disable=no-member + assert "string indices must be integers" in str(excinfo.value) + + +@pytest.mark.skipif( + not python_supported_by_iast() or sys.version_info < (3, 9, 0), reason="Python version not supported by IAST" +) +@pytest.mark.parametrize( + "input_str, index_pos, expected_result, tainted", + [ + ("abcde", 0, "a", True), + ("abcde", 1, "b", True), + ("abc", 2, "c", True), + (b"abcde", 0, 97, False), + (b"abcde", 1, 98, False), + (b"abc", 2, 99, False), + (bytearray(b"abcde"), 0, 97, False), + (bytearray(b"abcde"), 1, 98, False), + (bytearray(b"abc"), 2, 99, False), + ], +) +def test_string_index(input_str, index_pos, expected_result, tainted): + string_input = taint_pyobject( + pyobject=input_str, + source_name="test_add_aspect_tainting_left_hand", + source_value="foo", + source_origin=OriginType.PARAMETER, + ) + assert get_tainted_ranges(string_input) + result = mod.do_index(string_input, index_pos) + + assert result == expected_result + tainted_ranges = get_tainted_ranges(result) + if not tainted: + assert len(tainted_ranges) == 0 + else: + assert len(tainted_ranges) == 1 + assert tainted_ranges[0].start == 0 + assert tainted_ranges[0].length == 1 diff --git a/tests/appsec/iast/fixtures/aspects/str_methods.py b/tests/appsec/iast/fixtures/aspects/str_methods.py index edc4d42a72..d06ff0a7e9 100644 --- a/tests/appsec/iast/fixtures/aspects/str_methods.py +++ b/tests/appsec/iast/fixtures/aspects/str_methods.py @@ -159,8 +159,7 @@ def do_translate(s, translate_dict): return s.translate(translate_dict) -def do_decode_simple(s): - # type: (bytes, str, str) -> str +def do_decode_simple(s: bytes) -> str: return s.decode() @@ -990,7 +989,7 @@ def do_rstrip_2(s): # type: (str) -> str return s.rstrip() -def do_index(c, i): # type: (str, int) -> str +def do_index(c: str, i: int) -> str: return c[i]