Skip to content

Commit

Permalink
Merge branch '2.x' into christophe-papazian/APPSEC-11098_limit_memory
Browse files Browse the repository at this point in the history
  • Loading branch information
christophe-papazian committed Sep 27, 2023
2 parents ebe6b22 + 9034c4b commit 257b2cd
Show file tree
Hide file tree
Showing 8 changed files with 294 additions and 12 deletions.
160 changes: 154 additions & 6 deletions ddtrace/appsec/_iast/_ast/visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import ast
import copy
import sys
from typing import Any
from typing import List
from typing import Set
from typing import TYPE_CHECKING

from six import iteritems

Expand All @@ -14,14 +15,10 @@
from ..constants import DEFAULT_WEAK_RANDOMNESS_FUNCTIONS


if TYPE_CHECKING: # pragma: no cover
from typing import Any
from typing import List


PY3 = sys.version_info[0] >= 3
PY30_37 = sys.version_info >= (3, 0, 0) and sys.version_info < (3, 8, 0)
PY38_PLUS = sys.version_info >= (3, 8, 0)
PY39_PLUS = sys.version_info >= (3, 9, 0)

CODE_TYPE_FIRST_PARTY = "first_party"
CODE_TYPE_DD = "datadog"
Expand Down Expand Up @@ -63,6 +60,11 @@ def __init__(
"zfill": "ddtrace_aspects.zfill_aspect",
"ljust": "ddtrace_aspects.ljust_aspect",
},
# Replacement function for indexes and ranges
"slices": {
"index": "ddtrace_aspects.index_aspect",
# "slice": "ddtrace_aspects.slice_aspect",
},
# Replacement functions for modules
"module_functions": {
"BytesIO": "ddtrace_aspects.stringio_aspect",
Expand Down Expand Up @@ -129,6 +131,7 @@ def __init__(
self.filename = filename
self.module_name = module_name

self._aspect_index = self._aspects_spec["slices"]["index"]
self._aspect_functions = self._aspects_spec["functions"]
self._aspect_operators = self._aspects_spec["operators"]
self._aspect_methods = self._aspects_spec["stringalike_methods"]
Expand Down Expand Up @@ -292,6 +295,15 @@ def _attr_node(self, from_node, attr, ctx=ast.Load()): # noqa: B008
name_node = self._name_node(from_node, name_attr, ctx=ctx)
return self._node(ast.Attribute, from_node, attr=attr_attr, ctx=ctx, value=name_node)

def _assign_node(self, from_node, targets, value): # type: (Any, List[Any], Any) -> Any
return self._node(
ast.Assign,
from_node,
targets=targets,
value=value,
type_comment=None,
)

def find_insert_position(self, module_node): # type: (ast.Module) -> int
insert_position = 0
from_future_import_found = False
Expand Down Expand Up @@ -559,3 +571,139 @@ def visit_JoinedStr(self, joinedstr_node): # type: (ast.JoinedStr) -> Any
self.ast_modified = True
_set_metric_iast_instrumented_propagation()
return call_node

def visit_AugAssign(self, augassign_node): # type: (ast.AugAssign) -> Any
"""Replace an inplace add or multiply."""
if isinstance(augassign_node.target, ast.Subscript):
# Can't augassign to function call, ignore this node
augassign_node.target.avoid_convert = True # type: ignore[attr-defined]
self.generic_visit(augassign_node)
return augassign_node

# TODO: Replace an inplace add or multiply (+= / *=)
return augassign_node

def visit_Assign(self, assign_node): # type: (ast.Assign) -> Any
"""
Decompose multiple assignment into single ones and
check if any item in the targets list is if type Subscript and if
that's the case further decompose it to use a temp variable to
avoid assigning to a function call.
"""
# a = b = c
# __dd_tmp = c
# a = __dd_tmp

ret_nodes = []

if len(assign_node.targets) > 1:
# Multiple assignments, assign the value to a temporal variable
tmp_var_left = self._name_node(assign_node, "__dd_tmp", ctx=ast.Store())
assign_value = self._name_node(assign_node, "__dd_tmp", ctx=ast.Load())
assign_to_tmp = self._assign_node(from_node=assign_node, targets=[tmp_var_left], value=assign_node.value)
ret_nodes.append(assign_to_tmp)
self.ast_modified = True
else:
assign_value = assign_node.value # type: ignore

for target in assign_node.targets:
if isinstance(target, ast.Subscript):
# We can't assign to a function call, which is anyway going to rewrite
# the index destination so we just ignore that target
target.avoid_convert = True # type: ignore[attr-defined]
elif isinstance(target, (List, ast.Tuple)):
# Same for lists/tuples on the left side of the assignment
for element in target.elts:
if isinstance(element, ast.Subscript):
element.avoid_convert = True # type: ignore[attr-defined]

# Create a normal assignment. This way we decompose multiple assignments
# like (a = b = c) into a = b and a = c so the transformation above
# is possible.
# Decompose it into a normal, not multiple, assignment
new_assign_value = copy.copy(assign_value)

new_target = copy.copy(target)

single_assign = self._assign_node(assign_node, [new_target], new_assign_value)

self.generic_visit(single_assign)
ret_nodes.append(single_assign)

if len(ret_nodes) == 1:
return ret_nodes[0]

return ret_nodes

def visit_Delete(self, assign_node): # type: (ast.Delete) -> Any
# del replaced_index(foo, bar) would fail so avoid converting the right hand side
# since it's going to be deleted anyway

for target in assign_node.targets:
if isinstance(target, ast.Subscript):
target.avoid_convert = True # type: ignore[attr-defined]

self.generic_visit(assign_node)
return assign_node

def visit_Subscript(self, subscr_node): # type: (ast.Subscript) -> Any
"""
Turn an indexes[1] and slices[0:1:2] into the replacement function call
Optimization: dont convert if the indexes are strings
"""
self.generic_visit(subscr_node)

# We mark nodes to avoid_convert (check visit_Delete, visit_AugAssign, visit_Assign) due to complex
# expressions that raise errors when try to replace with index aspects
if hasattr(subscr_node, "avoid_convert"):
return subscr_node

# Optimization: String literal slices and indexes are not patched
if self._is_string_node(subscr_node.value):
return subscr_node

attr_node = self._attr_node(subscr_node, "")

call_node = self._call_node(
subscr_node,
func=attr_node,
args=[],
)
if isinstance(subscr_node.slice, ast.Slice):
# Slice[0:1:2]. The other cases in this if are Indexes[0]
# TODO: Add stlice
# aspect_split = self._aspect_slice.split(".")
# call_node.func.attr = aspect_split[1]
# call_node.func.value.id = aspect_split[0]
# none_node = self._none_constant(subscr_node)
# lower = subscr_node.slice.lower if subscr_node.slice.lower is not None else none_node
# upper = subscr_node.slice.upper if subscr_node.slice.upper is not None else none_node
# step = subscr_node.slice.step if subscr_node.slice.step is not None else none_node
# call_node.args.extend([subscr_node.value, lower, upper, step])
# self.ast_modified = True
return subscr_node
elif PY39_PLUS:
if self._is_string_node(subscr_node.slice):
return subscr_node
# In Py39+ the if subscr_node.slice member is not a Slice, is directly an unwrapped value
# for the index (e.g. Constant for a number, Name for a var, etc)
aspect_split = self._aspect_index.split(".")
call_node.func.attr = aspect_split[1]
call_node.func.value.id = aspect_split[0]
call_node.args.extend([subscr_node.value, subscr_node.slice])
# TODO: python 3.8 isn't working correctly with index_aspect, tests raise:
# corrupted size vs. prev_size in fastbins
# Test failed with exit code -6
# https://app.circleci.com/pipelines/github/DataDog/dd-trace-py/46665/workflows/3cf1257c-feaf-4653-bb9c-fb840baa1776/jobs/3031799
# elif isinstance(subscr_node.slice, ast.Index):
# if self._is_string_node(subscr_node.slice.value): # type: ignore[attr-defined]
# return subscr_node
# aspect_split = self._aspect_index.split(".")
# call_node.func.attr = aspect_split[1]
# call_node.func.value.id = aspect_split[0]
# call_node.args.extend([subscr_node.value, subscr_node.slice.value]) # type: ignore[attr-defined]
else:
return subscr_node

self.ast_modified = True
return call_node
48 changes: 48 additions & 0 deletions ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include <pybind11/stl.h>

#include "AspectIndex.h"

PyObject*
index_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* idx, TaintRangeMapType* tx_taint_map)
{
size_t len_result_o{ get_pyobject_size(result_o) };
auto idx_long = PyLong_AsLong(idx);
TaintRangeRefs ranges_to_set;
auto ranges = get_ranges(candidate_text);
for (const auto& current_range : ranges) {
if (current_range->start <= idx_long and idx_long < (current_range->start + current_range->length)) {
ranges_to_set.emplace_back(initializer->allocate_taint_range(0l, 1l, current_range->source));
break;
}
}

const auto& res_new_id = new_pyobject_id(result_o, len_result_o);
Py_DECREF(result_o);

if (ranges_to_set.empty()) {
return res_new_id;
}
set_ranges(res_new_id, ranges_to_set);

return res_new_id;
}

PyObject*
api_index_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs)
{
if (nargs != 2) {
return nullptr;
}
PyObject* candidate_text = args[0];
PyObject* idx = args[1];

PyObject* result_o;

result_o = PyObject_GetItem(candidate_text, idx);
auto ctx_map = initializer->get_tainting_map();
if (not ctx_map or ctx_map->empty()) {
return result_o;
}
auto res = index_aspect(result_o, candidate_text, idx, ctx_map);
return res;
}
8 changes: 8 additions & 0 deletions ddtrace/appsec/_iast/_taint_tracking/Aspects/AspectIndex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#pragma once
#include "Aspects/Helpers.h"
#include "TaintedOps/TaintedOps.h"

PyObject*
index_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* idx, TaintRangeMapType* tx_taint_map);
PyObject*
api_index_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs);
2 changes: 1 addition & 1 deletion ddtrace/appsec/_iast/_taint_tracking/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

```bash
sh clean.sh
cmake -DPYTHON_EXECUTABLE:FILEPATH=FILEPATH=/usr/bin/python3.11 . && \
cmake -DPYTHON_EXECUTABLE:FILEPATH=/usr/bin/python3.11 . && \
make -j _native && \
mv lib_native.so _native.so
```
Expand Down
2 changes: 2 additions & 0 deletions ddtrace/appsec/_iast/_taint_tracking/_native.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <pybind11/pybind11.h>

#include "Aspects/AspectExtend.h"
#include "Aspects/AspectIndex.h"
#include "Aspects/AspectJoin.h"
#include "Aspects/AspectOperatorAdd.h"
#include "Aspects/_aspects_helpers.h"
Expand All @@ -29,6 +30,7 @@ static PyMethodDef AspectsMethods[] = {
// python 3.5, 3.6. but METH_FASTCALL could be used instead for python
// >= 3.7
{ "add_aspect", ((PyCFunction)api_add_aspect), METH_FASTCALL, "aspect add" },
{ "index_aspect", ((PyCFunction)api_index_aspect), METH_FASTCALL, "aspect extend" },
{ "join_aspect", ((PyCFunction)api_join_aspect), METH_FASTCALL, "aspect add" },
{ "extend_aspect", ((PyCFunction)api_extend_aspect), METH_FASTCALL, "aspect extend" },
{ nullptr, nullptr, 0, nullptr }
Expand Down
15 changes: 13 additions & 2 deletions ddtrace/appsec/_iast/_taint_tracking/aspects.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import codecs
import traceback
from types import BuiltinFunctionType
from typing import Any
from typing import Callable
from typing import TYPE_CHECKING

from ddtrace.internal.compat import iteritems
Expand All @@ -25,8 +27,6 @@


if TYPE_CHECKING:
from typing import Any
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
Expand All @@ -40,6 +40,7 @@
_add_aspect = aspects.add_aspect
_extend_aspect = aspects.extend_aspect
_join_aspect = aspects.join_aspect
_index_aspect = aspects.index_aspect

__all__ = ["add_aspect", "str_aspect", "bytearray_extend_aspect", "decode_aspect", "encode_aspect"]

Expand Down Expand Up @@ -113,6 +114,16 @@ def join_aspect(orig_function, joiner, *args, **kwargs):
return joiner.join(*args, **kwargs)


def index_aspect(candidate_text, index) -> Any:
if not isinstance(candidate_text, TEXT_TYPES) or not isinstance(index, int):
return candidate_text[index]
try:
return _index_aspect(candidate_text, index)
except Exception as e:
_set_iast_error_metric("IAST propagation error. index_aspect. {}".format(e), traceback.format_exc())
return candidate_text[index]


def bytearray_extend_aspect(orig_function, op1, op2):
# type: (Callable, Any, Any) -> Any
if not isinstance(orig_function, BuiltinFunctionType):
Expand Down
66 changes: 66 additions & 0 deletions tests/appsec/iast/aspects/test_index_aspect_fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# -*- encoding: utf-8 -*-
import sys

import pytest

from ddtrace.appsec._iast._utils import _is_python_version_supported as python_supported_by_iast
from tests.appsec.iast.aspects.conftest import _iast_patched_module


if python_supported_by_iast():
from ddtrace.appsec._iast._taint_tracking import OriginType
from ddtrace.appsec._iast._taint_tracking import get_tainted_ranges
from ddtrace.appsec._iast._taint_tracking import taint_pyobject

mod = _iast_patched_module("tests.appsec.iast.fixtures.aspects.str_methods")


@pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST")
def test_string_index_error_index_error():
with pytest.raises(IndexError) as excinfo:
mod.do_index("abc", 22) # pylint: disable=no-member
assert "string index out of range" in str(excinfo.value)


@pytest.mark.skipif(not python_supported_by_iast(), reason="Python version not supported by IAST")
def test_string_index_error_type_error():
with pytest.raises(TypeError) as excinfo:
mod.do_index("abc", "22") # pylint: disable=no-member
assert "string indices must be integers" in str(excinfo.value)


@pytest.mark.skipif(
not python_supported_by_iast() or sys.version_info < (3, 9, 0), reason="Python version not supported by IAST"
)
@pytest.mark.parametrize(
"input_str, index_pos, expected_result, tainted",
[
("abcde", 0, "a", True),
("abcde", 1, "b", True),
("abc", 2, "c", True),
(b"abcde", 0, 97, False),
(b"abcde", 1, 98, False),
(b"abc", 2, 99, False),
(bytearray(b"abcde"), 0, 97, False),
(bytearray(b"abcde"), 1, 98, False),
(bytearray(b"abc"), 2, 99, False),
],
)
def test_string_index(input_str, index_pos, expected_result, tainted):
string_input = taint_pyobject(
pyobject=input_str,
source_name="test_add_aspect_tainting_left_hand",
source_value="foo",
source_origin=OriginType.PARAMETER,
)
assert get_tainted_ranges(string_input)
result = mod.do_index(string_input, index_pos)

assert result == expected_result
tainted_ranges = get_tainted_ranges(result)
if not tainted:
assert len(tainted_ranges) == 0
else:
assert len(tainted_ranges) == 1
assert tainted_ranges[0].start == 0
assert tainted_ranges[0].length == 1
Loading

0 comments on commit 257b2cd

Please sign in to comment.