From 11d9048ab525b0e82418d1fd9c176ff39492771b Mon Sep 17 00:00:00 2001 From: Andrew Fasano Date: Tue, 19 Dec 2023 13:13:36 -0500 Subject: [PATCH] PyPANDA interface for targetcmp --- panda/plugins/targetcmp/README.md | 41 ++++- panda/plugins/targetcmp/example.py | 28 ++++ panda/plugins/targetcmp/targetcmp.cpp | 168 ++++++++++++++------ panda/plugins/targetcmp/targetcmp_int_fns.h | 14 ++ panda/plugins/targetcmp/targetcmp_ppp.h | 13 ++ panda/python/core/create_panda_datatypes.py | 4 +- 6 files changed, 216 insertions(+), 52 deletions(-) create mode 100644 panda/plugins/targetcmp/example.py create mode 100644 panda/plugins/targetcmp/targetcmp_int_fns.h create mode 100644 panda/plugins/targetcmp/targetcmp_ppp.h diff --git a/panda/plugins/targetcmp/README.md b/panda/plugins/targetcmp/README.md index c4ea5b036e7..23698af65f6 100644 --- a/panda/plugins/targetcmp/README.md +++ b/panda/plugins/targetcmp/README.md @@ -8,6 +8,8 @@ At every function call, check the first two potential arguments to identify if i The goal here is to dynamically identify values that are compared against a known string. +**WARNING**: this plugin may have a sizable impact on emulation speed. In limited testing, CLI interfaces were still usable, but were noticably slower than normal. + Arguments --------- @@ -16,10 +18,47 @@ Arguments Dependencies ------------ -`callstack_instr` + +* `callstack_instr` +* `callwitharg` APIs and Callbacks ------------------ +To use `targetcmp` programatically you can use the following API functions. + +``` +bool add_string(const char* arg) +``` +Add `arg` to the list of strings that `targetcmp` is watching for. Returns true if your string was successfully added to the list. Also returns true if your string was already in the search list. + + +``` +bool remove_strings(const char* arg) +``` +Remove `arg` from the list of strings that `targetcmp` is watching for. Returns true if your string was successfully found and removed from the list. + +``` +void reset_strings() +``` +Remove all strings that targetcmp is watching for. + + +`targetcmp` provides a single callback that can be used by other plugins to take actions when a string match is found: + +Name: **on_tcm** +Signature: +```C +typedef void (* on_ssm_t)(CPUState *env, char* specified_value, char* compared_value); +``` + Example ------- +When tab-completing a command, linux will search the path for a given prefix. If we set our search target to whoami, we'll see when the whoami string is compared against a prefix of `wh`. + +``` +$(python3 -m pandare.qcows x86_64) -panda callstack_isntr -panda callwitharg -panda targetcmp:target_strings=whoami,verbose=True + +root@guest# echo wh[PRESS TAB TWICE] +[TargetCMP of whoami] wh +``` diff --git a/panda/plugins/targetcmp/example.py b/panda/plugins/targetcmp/example.py new file mode 100644 index 00000000000..9373b71d440 --- /dev/null +++ b/panda/plugins/targetcmp/example.py @@ -0,0 +1,28 @@ +from pandare import Panda + +panda = Panda(generic="arm") + +panda.load_plugin("callstack_instr") +panda.load_plugin("callwitharg") +panda.load_plugin("targetcmp")#, {"target_strings": "magic"}) +#panda.load_plugin("targetcmp", {"target_strings": "whoami", "verbose": True}) + +@panda.ppp("targetcmp", "on_tcm") +def on_tcm(cpu, known, unknown): + found = panda.ffi.string(known).decode() # The key we set + other = panda.ffi.string(unknown).decode() # What it was compared to + print(f"TCM detected comparison of {found} to {other}") + #panda.arch.dump_regs(cpu) + +@panda.queue_blocking +def driver(): + panda.revert_sync("root") + + t = panda.ffi.new("char[]", b"whoami") + panda.plugins["targetcmp"].add_target(t) + + print(panda.run_serial_cmd("find /usr/bin/ -name 'who*'")) + panda.end_analysis() + +#panda.disable_tb_chaining() +panda.run() diff --git a/panda/plugins/targetcmp/targetcmp.cpp b/panda/plugins/targetcmp/targetcmp.cpp index aaf1100fee9..f62aa8e70ff 100644 --- a/panda/plugins/targetcmp/targetcmp.cpp +++ b/panda/plugins/targetcmp/targetcmp.cpp @@ -12,36 +12,81 @@ #include "callwitharg/callwitharg.h" // Unnecessary? extern "C" { + +#include "targetcmp_ppp.h" #include "callwitharg/callwitharg_ext.h" + + bool add_target(const char* target); + bool remove_target(const char* target); + void reset_targets(void); + bool init_plugin(void *); void uninit_plugin(void *); + PPP_PROT_REG_CB(on_tcm); } -size_t target_str_len; -char *target_str; +PPP_CB_BOILERPLATE(on_tcm) + + std::ofstream outfile; +bool verbose = false; +//size_t target_str_len; + +// To support multiple concurrent targets, we need to track the strings we're tracking +std::vector targets; // We track the last QUEUE_SIZE addresses we've checked to avoid rereading guest pointers -#define QUEUE_SIZE 100 -std::atomic queue_idx(0); -std::atomic queue[QUEUE_SIZE]; -// Now we'll define a function to add to the queue -void add_to_queue(target_ulong addr) { - size_t idx = queue_idx.fetch_add(1); - queue[idx % QUEUE_SIZE] = addr; +//#define QUEUE_SIZE 100 +// We need a queue for each entry in targets +//std::vector> queue_idx; +//std::vector> queue; + +bool add_target(const char* target) { + // Check if we already have this target + for (size_t i = 0; i < targets.size(); i++) { + if (targets[i] == target) { + return false; + } + } + // If not, add it to our list + targets.push_back(target); + // And create a queue for it + //queue_idx.push_back(0); + //queue.push_back(new std::atomic[QUEUE_SIZE]); + + // And request that callwitharg track it + add_target_string((char*)target); + + return true; } -// And a function to check if an address is in the queue -bool in_queue(target_ulong addr) { - for (size_t i = 0; i < QUEUE_SIZE; i++) { - if (queue[i] == addr) return true; + +bool remove_target(const char* target) { + // Check if we have this target + for (size_t i = 0; i < targets.size(); i++) { + if (targets[i] == target) { + // If so, remove it + targets.erase(targets.begin() + i); + + // And delete its queue + //delete[] queue[i]; + //queue.erase(queue.begin() + i); + //queue_idx.erase(queue_idx.begin() + i); + + return true; + } } + return false; } -// C++ set for storing unique string matches +void reset_targets(void) { + targets.clear(); +} + +// C++ set for storing unique string matche that we've logged as key=value std::set matches; -void record_match(char *str) { +void record_match(CPUState* cpu, char *known_value, char *str) { if (strlen(str) == 0) return; for (int i = 0; i < strlen(str); i++) { @@ -50,36 +95,51 @@ void record_match(char *str) { } } - std::string s(str); + // If it's a self-comparison, ignore + if (strcmp(known_value, str) == 0 && strlen(known_value) == strlen(str)) { + return; + } + + // We want to create a key=value string to log + std::string s(known_value); + s.append("="); + s.append(str); + if (matches.find(s) == matches.end()) { - //printf("TargetCMP finds %s with length %u\n", s.c_str(), s.length()); - outfile << s << std::endl; + // New match - we want to report this! + + // Verbose: log to stdout + if (verbose) { + printf("[TargetCMP of %s] %s\n", known_value, str); + } + + // Log file: write down + if (outfile.is_open()) { + outfile << s << std::endl; + } + + // PPP output: + PPP_RUN_CB(on_tcm, cpu, known_value, str); + + + // Update matches matches.insert(s); + } } void on_match(CPUState* cpu, target_ulong func_addr, target_ulong *args, char* value, uint matching_idx, uint args_read) { // We expect 2 args, if matching_idx is 0, arg1 is our target pointer, otherwise arg0 assert(args_read >= 2); - - //printf("Match in arg %d with arg1=" TARGET_FMT_lx " and arg2=" TARGET_FMT_lx "\n", matching_idx, args[0], args[1]); - target_ulong target_ptr = args[matching_idx == 0 ? 1 : 0]; // If we matched arg0, we want arg1 and vice versa - // If it's in the queue, we've already checked it - bail - if (in_queue(target_ptr)) { - return; - } - // Otherwise add it to the queue - add_to_queue(target_ptr); - size_t short_len = strlen(value); size_t full_len = 4*short_len; char* other_arg = (char*)malloc(full_len + 1); // Try to read the target string from memory if (panda_virtual_memory_read(cpu, target_ptr, (uint8_t*)other_arg, full_len) == 0) { - other_arg[target_str_len] = '\0'; // Ensure null termination + other_arg[full_len] = '\0'; // Ensure null termination } else if (panda_virtual_memory_read(cpu, target_ptr, (uint8_t*)other_arg, short_len) == 0) { // Recovered short string - move null terminator early other_arg[short_len] = '\0'; // Ensure null termination @@ -88,7 +148,8 @@ void on_match(CPUState* cpu, target_ulong func_addr, target_ulong *args, char* v free(other_arg); return; } - record_match(other_arg); + + record_match(cpu, value, other_arg); free(other_arg); } @@ -96,6 +157,11 @@ void on_match(CPUState* cpu, target_ulong func_addr, target_ulong *args, char* v std::filesystem::path logfile = std::filesystem::current_path() / "targetcmp.txt"; bool init_plugin(void *self) { +#if !defined(TARGET_ARM) && !defined(TARGET_MIPS) && !defined(TARGET_X86_64) + printf("ERROR: Unsupported architecture for targetcmp\n"); + return false; +#endif + if (!init_callwitharg_api()) { printf("[targetcmp] Fatal error: unable to initialize callwitharg - is it loaded?\n"); return false; @@ -104,38 +170,40 @@ bool init_plugin(void *self) { std::unique_ptr args( panda_get_args("targetcmp"), panda_free_args); - const char* logfile_arg = panda_parse_string_opt(args.get(), "output_file", - NULL, "Output file to record compared values into"); - if (logfile_arg) logfile = std::string(logfile_arg); + // Optional arguments: target_strings, output_file, verbose + char *target_str = strdup(panda_parse_string_opt(args.get(), "target_strings", + "String(s) to match. Colon seperated", "")); - target_str = strdup(panda_parse_string_req(args.get(), "target_str", "String to match")); - target_str_len = strlen(target_str); - if (target_str_len <= 0) { - printf("targetcmp error: invalid target_str argument\n"); - return false; + const char* logfile_arg = panda_parse_string_opt(args.get(), "output_file", + NULL, "Output file to record compared values into"); + if (logfile_arg) { + // Open file for writing, delete anything there. + outfile.open(logfile.string(), std::ios_base::out | std::ios_base::trunc); } - // On every function call, use our callback to check an argument is the target_str, if so store the other arg -#if defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_X86_64) - // Create empty file - Just so we see that something's happening - // Open file for writing, delete anything there. - outfile.open(logfile.string(), std::ios_base::out | std::ios_base::trunc); + verbose = panda_parse_bool_opt(args.get(), "verbose", "enable verbose output on every match"); - // Call callwitharg's add_target_string function - add_target_string(target_str); + // If we have a target_str, split it on commas and add each target + if (strlen(target_str) > 0) { + char* target = strtok(target_str, ":"); + while (target != NULL) { + add_target(target); + target = strtok(NULL, ":"); + } + } // Register on_call_match with callwitharg's on_call_match_str PPP callback PPP_REG_CB("callwitharg", on_call_match_str, on_match); + + free(target_str); + return true; -#endif - printf("ERROR: Unsupported architecture for targetcmp\n"); - return false; + } void uninit_plugin(void *self) { if (outfile.is_open()) { outfile.close(); } - free((void*)target_str); -} +} \ No newline at end of file diff --git a/panda/plugins/targetcmp/targetcmp_int_fns.h b/panda/plugins/targetcmp/targetcmp_int_fns.h new file mode 100644 index 00000000000..421491b93f9 --- /dev/null +++ b/panda/plugins/targetcmp/targetcmp_int_fns.h @@ -0,0 +1,14 @@ +#ifndef __TARGETCMP_INT_FNS_H_ +#define __TARGETCMP_INT_FNS_H_ + +// BEGIN_PYPANDA_NEEDS_THIS -- do not delete this comment bc pypanda +// api autogen needs it. And don't put any compiler directives +// between this and END_PYPANDA_NEEDS_THIS except includes of other +// files in this directory that contain subsections like this one. + +bool add_target(const char* arg); +bool remove_target(const char* arg); +void reset_targets(); + +// END_PYPANDA_NEEDS_THIS -- do not delete this comment! +#endif diff --git a/panda/plugins/targetcmp/targetcmp_ppp.h b/panda/plugins/targetcmp/targetcmp_ppp.h new file mode 100644 index 00000000000..c892ae665bb --- /dev/null +++ b/panda/plugins/targetcmp/targetcmp_ppp.h @@ -0,0 +1,13 @@ +#ifndef __TARGETCMP_PPP_H_ +#define __TARGETCMP_PPP_H_ + + +// BEGIN_PYPANDA_NEEDS_THIS -- do not delete this comment bc pypanda +// api autogen needs it. And don't put any compiler directives +// between this and END_PYPANDA_NEEDS_THIS except includes of other +// files in this directory that contain subsections like this one. + +PPP_CB_TYPEDEF(void, on_tcm, CPUState *env, char* known, char* match); + +// END_PYPANDA_NEEDS_THIS -- do not delete this comment! +#endif diff --git a/panda/python/core/create_panda_datatypes.py b/panda/python/core/create_panda_datatypes.py index 98657007c24..2d226628580 100755 --- a/panda/python/core/create_panda_datatypes.py +++ b/panda/python/core/create_panda_datatypes.py @@ -305,6 +305,7 @@ def expand_ppp_def(line): define_clean_header(ffi, include_dir + "/proc_start_linux_ppp.h") define_clean_header(ffi, include_dir + "/forcedexec_ppp.h") define_clean_header(ffi, include_dir + "/stringsearch_ppp.h") + define_clean_header(ffi, include_dir + "/targetcmp_ppp.h") # END PPP headers define_clean_header(ffi, include_dir + "/breakpoints.h") @@ -380,8 +381,9 @@ def main(install=False,recompile=True): # TODO: programtically copy anything that ends with _ppp.h copy_ppp_header("%s/%s" % (PLUGINS_DIR+"/forcedexec", "forcedexec_ppp.h")) copy_ppp_header("%s/%s" % (PLUGINS_DIR+"/stringsearch", "stringsearch_ppp.h")) + copy_ppp_header("%s/%s" % (PLUGINS_DIR+"/targetcmp", "targetcmp_ppp.h")) create_pypanda_header("%s/%s" % (PLUGINS_DIR+"/hooks2", "hooks2.h")) - + copy_ppp_header("%s/%s" % (PLUGINS_DIR+"/proc_start_linux", "proc_start_linux_ppp.h")) create_pypanda_header("%s/%s" % (PLUGINS_DIR+"/proc_start_linux", "proc_start_linux.h")) create_pypanda_header("%s/%s" % (PLUGINS_DIR+"/cosi", "cosi.h"))