diff --git a/t/helper/test-trace2.c b/t/helper/test-trace2.c index d5ca0046c89fd4..1adac29a575254 100644 --- a/t/helper/test-trace2.c +++ b/t/helper/test-trace2.c @@ -412,6 +412,56 @@ static int ut_201counter(int argc, const char **argv) return 0; } +static int ut_300redact_start(int argc, const char **argv) +{ + if (!argc) + die("expect "); + + trace2_cmd_start(argv); + + return 0; +} + +static int ut_301redact_child_start(int argc, const char **argv) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + int k; + + if (!argc) + die("expect "); + + for (k = 0; argv[k]; k++) + strvec_push(&cmd.args, argv[k]); + + trace2_child_start(&cmd); + + strvec_clear(&cmd.args); + + return 0; +} + +static int ut_302redact_exec(int argc, const char **argv) +{ + if (!argc) + die("expect "); + + trace2_exec(argv[0], &argv[1]); + + return 0; +} + +static int ut_303redact_def_param(int argc, const char **argv) +{ + struct key_value_info kvi = KVI_INIT; + + if (argc < 2) + die("expect "); + + trace2_def_param(argv[0], argv[1], &kvi); + + return 0; +} + /* * Usage: * test-tool trace2 @@ -438,6 +488,11 @@ static struct unit_test ut_table[] = { { ut_200counter, "200counter", " [ [ [...]]]" }, { ut_201counter, "201counter", " " }, + + { ut_300redact_start, "300redact_start", "" }, + { ut_301redact_child_start, "301redact_child_start", "" }, + { ut_302redact_exec, "302redact_exec", " " }, + { ut_303redact_def_param, "303redact_def_param", " " }, }; /* clang-format on */ diff --git a/t/t0210-trace2-normal.sh b/t/t0210-trace2-normal.sh index 80e76a4695ed8d..c312657a12cd34 100755 --- a/t/t0210-trace2-normal.sh +++ b/t/t0210-trace2-normal.sh @@ -2,7 +2,7 @@ test_description='test trace2 facility (normal target)' -TEST_PASSES_SANITIZE_LEAK=true +TEST_PASSES_SANITIZE_LEAK=false . ./test-lib.sh # Turn off any inherited trace2 settings for this test. @@ -283,4 +283,22 @@ test_expect_success 'using global config with include' ' test_cmp expect actual ' +test_expect_success 'unsafe URLs are redacted by default' ' + test_when_finished \ + "rm -r trace.normal unredacted.normal clone clone2" && + + test_config_global \ + "url.$(pwd).insteadOf" https://user:pwd@example.com/ && + test_config_global trace2.configParams "core.*,remote.*.url" && + + GIT_TRACE2="$(pwd)/trace.normal" \ + git clone https://user:pwd@example.com/ clone && + ! grep user:pwd trace.normal && + + GIT_TRACE2_REDACT=0 GIT_TRACE2="$(pwd)/unredacted.normal" \ + git clone https://user:pwd@example.com/ clone2 && + grep "start .* clone https://user:pwd@example.com" unredacted.normal && + grep "remote.origin.url=https://user:pwd@example.com" unredacted.normal +' + test_done diff --git a/t/t0211-trace2-perf.sh b/t/t0211-trace2-perf.sh index cfba6861322e37..290b6eaaab1605 100755 --- a/t/t0211-trace2-perf.sh +++ b/t/t0211-trace2-perf.sh @@ -2,7 +2,7 @@ test_description='test trace2 facility (perf target)' -TEST_PASSES_SANITIZE_LEAK=true +TEST_PASSES_SANITIZE_LEAK=false . ./test-lib.sh # Turn off any inherited trace2 settings for this test. @@ -268,4 +268,23 @@ test_expect_success PTHREADS 'global counter test/test2' ' have_counter_event "main" "counter" "test" "test2" 60 actual ' +test_expect_success 'unsafe URLs are redacted by default' ' + test_when_finished \ + "rm -r actual trace.perf unredacted.perf clone clone2" && + + test_config_global \ + "url.$(pwd).insteadOf" https://user:pwd@example.com/ && + test_config_global trace2.configParams "core.*,remote.*.url" && + + GIT_TRACE2_PERF="$(pwd)/trace.perf" \ + git clone https://user:pwd@example.com/ clone && + ! grep user:pwd trace.perf && + + GIT_TRACE2_REDACT=0 GIT_TRACE2_PERF="$(pwd)/unredacted.perf" \ + git clone https://user:pwd@example.com/ clone2 && + perl "$TEST_DIRECTORY/t0211/scrub_perf.perl" actual && + grep "d0|main|start|.* clone https://user:pwd@example.com" actual && + grep "d0|main|def_param|.*|remote.origin.url:https://user:pwd@example.com" actual +' + test_done diff --git a/t/t0212-trace2-event.sh b/t/t0212-trace2-event.sh index 6d3374ff773c1e..147643d582643e 100755 --- a/t/t0212-trace2-event.sh +++ b/t/t0212-trace2-event.sh @@ -323,4 +323,44 @@ test_expect_success 'discard traces when there are too many files' ' head -n2 trace_target_dir/git-trace2-discard | tail -n1 | grep \"event\":\"too_many_files\" ' +# In the following "...redact..." tests, skip testing the GIT_TRACE2_REDACT=0 +# case because we would need to exactly model the full JSON event stream like +# we did in the basic tests above and I do not think it is worth it. + +test_expect_success 'unsafe URLs are redacted by default in cmd_start events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 300redact_start git clone https://user:pwd@example.com/ clone2 && + ! grep user:pwd trace.event +' + +test_expect_success 'unsafe URLs are redacted by default in child_start events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 301redact_child_start git clone https://user:pwd@example.com/ clone2 && + ! grep user:pwd trace.event +' + +test_expect_success 'unsafe URLs are redacted by default in exec events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 302redact_exec git clone https://user:pwd@example.com/ clone2 && + ! grep user:pwd trace.event +' + +test_expect_success 'unsafe URLs are redacted by default in def_param events' ' + test_when_finished \ + "rm -r trace.event" && + + GIT_TRACE2_EVENT="$(pwd)/trace.event" \ + test-tool trace2 303redact_def_param url https://user:pwd@example.com/ && + ! grep user:pwd trace.event +' + test_done diff --git a/trace2.c b/trace2.c index 6dc74dff4c7320..87d9a3a036129b 100644 --- a/trace2.c +++ b/trace2.c @@ -20,6 +20,7 @@ #include "trace2/tr2_tmr.h" static int trace2_enabled; +static int trace2_redact = 1; static int tr2_next_child_id; /* modify under lock */ static int tr2_next_exec_id; /* modify under lock */ @@ -227,6 +228,8 @@ void trace2_initialize_fl(const char *file, int line) if (!tr2_tgt_want_builtins()) return; trace2_enabled = 1; + if (!git_env_bool("GIT_TRACE2_REDACT", 1)) + trace2_redact = 0; tr2_sid_get(); @@ -247,12 +250,93 @@ int trace2_is_enabled(void) return trace2_enabled; } +/* + * Redacts an argument, i.e. ensures that no password in + * https://user:password@host/-style URLs is logged. + * + * Returns the original if nothing needed to be redacted. + * Returns a pointer that needs to be `free()`d otherwise. + */ +static const char *redact_arg(const char *arg) +{ + const char *p, *colon; + size_t at; + + if (!trace2_redact || + (!skip_prefix(arg, "https://", &p) && + !skip_prefix(arg, "http://", &p))) + return arg; + + at = strcspn(p, "@/"); + if (p[at] != '@') + return arg; + + colon = memchr(p, ':', at); + if (!colon) + return arg; + + return xstrfmt("%.*s:%s", (int)(colon - arg), arg, p + at); +} + +/* + * Redacts arguments in an argument list. + * + * Returns the original if nothing needed to be redacted. + * Otherwise, returns a new array that needs to be released + * via `free_redacted_argv()`. + */ +static const char **redact_argv(const char **argv) +{ + int i, j; + const char *redacted = NULL; + const char **ret; + + if (!trace2_redact) + return argv; + + for (i = 0; argv[i]; i++) + if ((redacted = redact_arg(argv[i])) != argv[i]) + break; + + if (!argv[i]) + return argv; + + for (j = 0; argv[j]; j++) + ; /* keep counting */ + + ALLOC_ARRAY(ret, j + 1); + ret[j] = NULL; + + for (j = 0; j < i; j++) + ret[j] = argv[j]; + ret[i] = redacted; + for (++i; argv[i]; i++) { + redacted = redact_arg(argv[i]); + ret[i] = redacted ? redacted : argv[i]; + } + + return ret; +} + +static void free_redacted_argv(const char **redacted, const char **argv) +{ + int i; + + if (redacted != argv) { + for (i = 0; argv[i]; i++) + if (redacted[i] != argv[i]) + free((void *)redacted[i]); + free((void *)redacted); + } +} + void trace2_cmd_start_fl(const char *file, int line, const char **argv) { struct tr2_tgt *tgt_j; int j; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **redacted; if (!trace2_enabled) return; @@ -260,10 +344,14 @@ void trace2_cmd_start_fl(const char *file, int line, const char **argv) us_now = getnanotime() / 1000; us_elapsed_absolute = tr2tls_absolute_elapsed(us_now); + redacted = redact_argv(argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_start_fl) tgt_j->pfn_start_fl(file, line, us_elapsed_absolute, - argv); + redacted); + + free_redacted_argv(redacted, argv); } void trace2_cmd_exit_fl(const char *file, int line, int code) @@ -409,6 +497,7 @@ void trace2_child_start_fl(const char *file, int line, int j; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **orig_argv = cmd->args.v; if (!trace2_enabled) return; @@ -419,10 +508,24 @@ void trace2_child_start_fl(const char *file, int line, cmd->trace2_child_id = tr2tls_locked_increment(&tr2_next_child_id); cmd->trace2_child_us_start = us_now; + /* + * The `pfn_child_start_fl` API takes a `struct child_process` + * rather than a simple `argv` for the child because some + * targets make use of the additional context bits/values. So + * temporarily replace the original argv (inside the `strvec`) + * with a possibly redacted version. + */ + cmd->args.v = redact_argv(orig_argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_child_start_fl) tgt_j->pfn_child_start_fl(file, line, us_elapsed_absolute, cmd); + + if (cmd->args.v != orig_argv) { + free_redacted_argv(cmd->args.v, orig_argv); + cmd->args.v = orig_argv; + } } void trace2_child_exit_fl(const char *file, int line, struct child_process *cmd, @@ -493,6 +596,7 @@ int trace2_exec_fl(const char *file, int line, const char *exe, int exec_id; uint64_t us_now; uint64_t us_elapsed_absolute; + const char **redacted; if (!trace2_enabled) return -1; @@ -502,10 +606,14 @@ int trace2_exec_fl(const char *file, int line, const char *exe, exec_id = tr2tls_locked_increment(&tr2_next_exec_id); + redacted = redact_argv(argv); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_exec_fl) tgt_j->pfn_exec_fl(file, line, us_elapsed_absolute, - exec_id, exe, argv); + exec_id, exe, redacted); + + free_redacted_argv(redacted, argv); return exec_id; } @@ -637,13 +745,19 @@ void trace2_def_param_fl(const char *file, int line, const char *param, { struct tr2_tgt *tgt_j; int j; + const char *redacted; if (!trace2_enabled) return; + redacted = redact_arg(value); + for_each_wanted_builtin (j, tgt_j) if (tgt_j->pfn_param_fl) - tgt_j->pfn_param_fl(file, line, param, value, kvi); + tgt_j->pfn_param_fl(file, line, param, redacted, kvi); + + if (redacted != value) + free((void *)redacted); } void trace2_def_repo_fl(const char *file, int line, struct repository *repo) diff --git a/trace2.h b/trace2.h index 40d8c2e02a5e50..1f0669bbd2d4f0 100644 --- a/trace2.h +++ b/trace2.h @@ -337,8 +337,8 @@ struct key_value_info; void trace2_def_param_fl(const char *file, int line, const char *param, const char *value, const struct key_value_info *kvi); -#define trace2_def_param(param, value) \ - trace2_def_param_fl(__FILE__, __LINE__, (param), (value)) +#define trace2_def_param(param, value, kvi) \ + trace2_def_param_fl(__FILE__, __LINE__, (param), (value), (kvi)) /* * Tell trace2 about a newly instantiated repo object and assign