From 19550b52640f29681938fd166cbfbf9e923df3ef Mon Sep 17 00:00:00 2001 From: Ilya Zorin Date: Wed, 4 Sep 2024 04:52:30 -0700 Subject: [PATCH] [taint] Follow field accesses based on a config option Summary: While it does make sense to follow field access for taint propagation in some cases in other cases it may result in FPs which are very hard to follow so this diff makes this behaviour configurable instead of unconditionally enable it based on a language. Reviewed By: skcho Differential Revision: D62099585 Privacy Context Container: L1208441 fbshipit-source-id: 1b801ec5962b4f41cce18380b92207e0b67ac6fc --- Makefile | 1 + infer/man/man1/infer-analyze.txt | 5 ++ infer/man/man1/infer-full.txt | 6 ++ infer/man/man1/infer.txt | 6 ++ infer/src/base/Config.ml | 8 +++ infer/src/base/Config.mli | 2 + infer/src/pulse/PulseTaintOperations.ml | 3 +- .../.inferconfig | 9 +++ .../Makefile | 16 ++++++ .../hh/shapes.hack | 10 ++++ .../issues.exp | 1 + .../shapes.hack | 55 +++++++++++++++++++ .../taint_config.json | 23 ++++++++ 13 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/.inferconfig create mode 100644 infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/Makefile create mode 100644 infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/hh/shapes.hack create mode 100644 infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/issues.exp create mode 100644 infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/shapes.hack create mode 100644 infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/taint_config.json diff --git a/Makefile b/Makefile index 5d5fec68abd..97083b89e3c 100644 --- a/Makefile +++ b/Makefile @@ -198,6 +198,7 @@ DIRECT_TESTS += \ BUILD_SYSTEMS_TESTS += \ differential_hack \ pulse_messages_hack \ + pulse_taint_hack_no_follow_field_accesses \ endif diff --git a/infer/man/man1/infer-analyze.txt b/infer/man/man1/infer-analyze.txt index 1224bf9208e..6f08aca8763 100644 --- a/infer/man/man1/infer-analyze.txt +++ b/infer/man/man1/infer-analyze.txt @@ -875,6 +875,11 @@ PULSE CHECKER OPTIONS originate at the source will be reported. If a sink has such a kind, only sensitive data flows to the sink will be reported. + --no-pulse-taint-follow-field-accesses + Deactivates: Specify if taint analysis should follow field + accesses when propagating taints. (Conversely: + --pulse-taint-follow-field-accesses) + --pulse-taint-opaque-files +path Specify files that should be treated as opaque for taint analysis to make sure that procedure's belonging to these files are always diff --git a/infer/man/man1/infer-full.txt b/infer/man/man1/infer-full.txt index 5063400e7a5..e6a1f72d63e 100644 --- a/infer/man/man1/infer-full.txt +++ b/infer/man/man1/infer-full.txt @@ -1644,6 +1644,12 @@ OPTIONS kind, only sensitive data flows to the sink will be reported. See also infer-analyze(1). + --no-pulse-taint-follow-field-accesses + Deactivates: Specify if taint analysis should follow field + accesses when propagating taints. (Conversely: + --pulse-taint-follow-field-accesses) + See also infer-analyze(1). + --pulse-taint-opaque-files +path Specify files that should be treated as opaque for taint analysis to make sure that procedure's belonging to these files are always diff --git a/infer/man/man1/infer.txt b/infer/man/man1/infer.txt index 6b6c6c5ae7d..7a4cd840a54 100644 --- a/infer/man/man1/infer.txt +++ b/infer/man/man1/infer.txt @@ -1644,6 +1644,12 @@ OPTIONS kind, only sensitive data flows to the sink will be reported. See also infer-analyze(1). + --no-pulse-taint-follow-field-accesses + Deactivates: Specify if taint analysis should follow field + accesses when propagating taints. (Conversely: + --pulse-taint-follow-field-accesses) + See also infer-analyze(1). + --pulse-taint-opaque-files +path Specify files that should be treated as opaque for taint analysis to make sure that procedure's belonging to these files are always diff --git a/infer/src/base/Config.ml b/infer/src/base/Config.ml index c5be620cfa1..1a3a9024769 100644 --- a/infer/src/base/Config.ml +++ b/infer/src/base/Config.ml @@ -2721,6 +2721,12 @@ and pulse_taint_data_flow_kinds = such a kind, only sensitive data flows to the sink will be reported." +and pulse_taint_follow_field_accesses = + CLOpt.mk_bool ~long:"pulse-taint-follow-field-accesses" ~default:true + ~in_help:InferCommand.[(Analyze, manual_pulse)] + "Specify if taint analysis should follow field accesses when propagating taints." + + and pulse_taint_opaque_files = CLOpt.mk_path_list ~long:"pulse-taint-opaque-files" ~in_help:InferCommand.[(Analyze, manual_pulse)] @@ -4547,6 +4553,8 @@ and pulse_taint_config = and pulse_taint_opaque_files = RevList.to_list !pulse_taint_opaque_files +and pulse_taint_follow_field_accesses = !pulse_taint_follow_field_accesses + and pulse_taint_short_traces = !pulse_taint_short_traces and pulse_taint_skip_sources = !pulse_taint_skip_sources diff --git a/infer/src/base/Config.mli b/infer/src/base/Config.mli index 65fa783c2d3..cbd720b47d8 100644 --- a/infer/src/base/Config.mli +++ b/infer/src/base/Config.mli @@ -698,6 +698,8 @@ type pulse_taint_config = val pulse_taint_config : pulse_taint_config +val pulse_taint_follow_field_accesses : bool + val pulse_taint_opaque_files : string list val pulse_taint_short_traces : bool diff --git a/infer/src/pulse/PulseTaintOperations.ml b/infer/src/pulse/PulseTaintOperations.ml index dcd8d1a2006..25028c96ef5 100644 --- a/infer/src/pulse/PulseTaintOperations.ml +++ b/infer/src/pulse/PulseTaintOperations.ml @@ -290,7 +290,8 @@ let fold_taint_dependencies addr_hist0 astate ~init ~f = | ArrayAccess _ -> true | FieldAccess _ -> - Language.curr_language_is Hack || Language.curr_language_is Python + Config.pulse_taint_follow_field_accesses + && (Language.curr_language_is Hack || Language.curr_language_is Python) | Dereference -> false in diff --git a/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/.inferconfig b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/.inferconfig new file mode 100644 index 00000000000..a34e10dde61 --- /dev/null +++ b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/.inferconfig @@ -0,0 +1,9 @@ +{ + "scheduler": "callgraph", + "pulse-specialization-partial": true, + "pulse-taint-short-traces": true, + "pulse-taint-follow-field-accesses": false, + "pulse-taint-config": [ + "taint_config.json" + ] +} diff --git a/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/Makefile b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/Makefile new file mode 100644 index 00000000000..6908e5f9381 --- /dev/null +++ b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/Makefile @@ -0,0 +1,16 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +TESTS_DIR = ../.. + +INFER_OPTIONS = \ + --pulse-only --debug-exceptions --no-pulse-force-continue + +INFERPRINT_OPTIONS = --issues-tests + +HH_SOURCES = $(sort $(wildcard hh/*.hack)) +SOURCES = $(sort $(wildcard *.hack)) + +include $(TESTS_DIR)/hack.make diff --git a/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/hh/shapes.hack b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/hh/shapes.hack new file mode 100644 index 00000000000..1b2f72745a5 --- /dev/null +++ b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/hh/shapes.hack @@ -0,0 +1,10 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +namespace Shapes; + +function unknown(mixed $sc): string { + return "42"; +} diff --git a/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/issues.exp b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/issues.exp new file mode 100644 index 00000000000..2a8d4ebac7a --- /dev/null +++ b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/issues.exp @@ -0,0 +1 @@ +shapes.hack, Shapes::C1.passViaShapeGetBad, 2, TAINT_ERROR, no_bucket, ERROR, [source of the taint here: value passed as argument `#0` to `Shapes::C1.passViaShapeGetBad` with kind `Shapes`,flows to this sink: value passed as argument `#0` to `Shapes::ShapeLogger$static.taintSink` with kind `Shapes`], source: Shapes::C1.passViaShapeGetBad, sink: Shapes::ShapeLogger$static.taintSink, tainted expression: $s->debug_data diff --git a/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/shapes.hack b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/shapes.hack new file mode 100644 index 00000000000..8186182192b --- /dev/null +++ b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/shapes.hack @@ -0,0 +1,55 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +namespace Shapes; + +class SensitiveClass { + public string $sensitiveField = "42"; + + public function getId(): string { + return $this->sensitiveField; + } +} + +class ShapeLogger { + const type TSchemaShape = shape( + 'msg' => string, + ?'debug_data' => ?string, + ); + + public static function logData(this::TSchemaShape $data): void { + self::taintSink($data); + } + + public static function taintSink(mixed $data): void {} +} + +class C1 { + public function passViaShapeOk(SensitiveClass $sc): void { + ShapeLogger::logData( + shape('msg' => 'Oh-oh', 'debug_data' => $sc->sensitiveField), + ); + } + + public function passViaShapeGetBad(SensitiveClass $sc): void { + $s = shape('msg' => 'Oh-oh', 'debug_data' => $sc->sensitiveField); + ShapeLogger::taintSink($s['debug_data']); + } + + public function passUnrelatedViaShapeGetOk(SensitiveClass $sc): void { + $s = shape('msg' => 'Oh-oh', 'debug_data' => $sc->sensitiveField); + ShapeLogger::taintSink($s['msg']); + } + + public function passViaUnknownOk(SensitiveClass $sc): void { + $data = unknown($sc); + ShapeLogger::logData(shape('msg' => 'Oh-oh', 'debug_data' => $data)); + } + + public function passViaShapeAndUnknownOk(SensitiveClass $sc): void { + $data = unknown(shape("sc" => $sc)); + ShapeLogger::logData(shape('msg' => 'Oh-oh', 'debug_data' => $data)); + } +} diff --git a/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/taint_config.json b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/taint_config.json new file mode 100644 index 00000000000..e8c5a1d945e --- /dev/null +++ b/infer/tests/build_systems/pulse_taint_hack_no_follow_field_accesses/taint_config.json @@ -0,0 +1,23 @@ +{ + "pulse-taint-sources": [ + { + "procedure_regex": ".*", + "taint_target": [ + "ArgumentsMatchingTypes", + [ + "SensitiveClass" + ] + ], + "kinds": ["Shapes"] + } + ], + "pulse-taint-sinks": [ + { "class_names": ["Shapes::ShapeLogger"], "method_names": ["taintSink"], "kinds": ["Shapes"] } + ], + "pulse-taint-policies": [ + { + "short_description": "Taint flow", + "taint_flows": [{"source_kinds": ["Shapes"], "sink_kinds": ["Shapes"]}] + } + ] +}