From 7c296aa665b9a676daa2034c4d5f655a711d2199 Mon Sep 17 00:00:00 2001 From: Jonathan Becker Date: Fri, 10 May 2024 13:00:54 -0500 Subject: [PATCH] perf(decode): improve `score_signature` heuristic (#402) * perf(decode): improve `score_signature` heuristic * fix(tests): make tests pass --- crates/common/src/ether/signatures.rs | 31 +++++++++++++++++++++------ crates/decode/src/core/mod.rs | 14 ++++++++---- crates/decompile/src/core/mod.rs | 12 +++++------ 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/crates/common/src/ether/signatures.rs b/crates/common/src/ether/signatures.rs index 52ec1cf4..4e1239ba 100644 --- a/crates/common/src/ether/signatures.rs +++ b/crates/common/src/ether/signatures.rs @@ -305,7 +305,7 @@ impl ResolveSelector for ResolvedFunction { } } -pub fn score_signature(signature: &str) -> u32 { +pub fn score_signature(signature: &str, num_words: Option) -> u32 { // the score starts at 1000 let mut score = 1000; @@ -314,7 +314,27 @@ pub fn score_signature(signature: &str) -> u32 { score -= signature.len() as u32; // prioritize signatures with less numbers - score -= (signature.matches(|c: char| c.is_numeric()).count() as u32) * 3; + score -= (signature.split("(").next().unwrap_or("").matches(|c: char| c.is_numeric()).count() + as u32) * + 3; + + // prioritize signatures with parameters + let num_params = signature.matches(',').count() + 1; + score += num_params as u32 * 10; + + // count the number of parameters in the signature, if enabled + if let Some(num_words) = num_words { + let num_dyn_params = signature.matches("bytes").count() + + signature.matches("string").count() + + signature.matches("[").count(); + let num_static_params = num_params - num_dyn_params; + + // reduce the score if the signature has less static parameters than there are words in the + // calldata + if num_static_params < num_words { + score -= (num_words - num_static_params) as u32 * 10; + } + } score } @@ -481,10 +501,7 @@ mod tests { #[test] fn score_signature_should_return_correct_score() { let signature = String::from("test_signature"); - let score = score_signature(&signature); - let expected_score = 1000 - - (signature.len() as u32) - - (signature.matches(|c: char| c.is_numeric()).count() as u32) * 3; - assert_eq!(score, expected_score); + let score = score_signature(&signature, None); + assert_eq!(score, 996); } } diff --git a/crates/decode/src/core/mod.rs b/crates/decode/src/core/mod.rs index 5fed3c27..16fe8df5 100644 --- a/crates/decode/src/core/mod.rs +++ b/crates/decode/src/core/mod.rs @@ -134,14 +134,20 @@ pub async fn decode(mut args: DecodeArgs) -> Result { if matches.len() > 1 { debug!("multiple possible matches found. as of 0.8.0, heimdall uses a heuristic to select the best match."); + let num_words = calldata[4..].chunks(32).len(); + matches.sort_by(|a, b| { - let a_score = score_signature(&a.signature); - let b_score = score_signature(&b.signature); + let a_score = score_signature(&a.signature, Some(num_words)); + let b_score = score_signature(&b.signature, Some(num_words)); b_score.cmp(&a_score) }); // debug print for match_ in &matches { - debug!(" > {}: {}", match_.signature, score_signature(&match_.signature)); + debug!( + " > {}: {}", + match_.signature, + score_signature(&match_.signature, Some(num_words)) + ); } } else if matches.is_empty() { warn!("couldn't find any resolved matches for '{}'", function_selector); @@ -153,7 +159,7 @@ pub async fn decode(mut args: DecodeArgs) -> Result { // chunk in blocks of 32 bytes let calldata_words = calldata[4..].chunks(32).map(|x| x.to_owned()).collect::>(); - // while calldata_words is not empty, iterate over it + // while calldata_words is not empty, iterate over itcar let mut i = 0; let mut covered_words = HashSet::new(); while covered_words.len() != calldata_words.len() { diff --git a/crates/decompile/src/core/mod.rs b/crates/decompile/src/core/mod.rs index 5fd35d20..2631ab16 100644 --- a/crates/decompile/src/core/mod.rs +++ b/crates/decompile/src/core/mod.rs @@ -185,8 +185,8 @@ pub async fn decompile(args: DecompilerArgs) -> Result { // sort by score, take the highest let mut potential_values = v.clone(); potential_values.sort_by(|a: &ResolvedError, b: &ResolvedError| { - let a_score = score_signature(&a.signature); - let b_score = score_signature(&b.signature); + let a_score = score_signature(&a.signature, None); + let b_score = score_signature(&b.signature, None); b_score.cmp(&a_score) }); @@ -217,8 +217,8 @@ pub async fn decompile(args: DecompilerArgs) -> Result { // sort by score, take the highest let mut potential_values = v.clone(); potential_values.sort_by(|a: &ResolvedLog, b: &ResolvedLog| { - let a_score = score_signature(&a.signature); - let b_score = score_signature(&b.signature); + let a_score = score_signature(&a.signature, None); + let b_score = score_signature(&b.signature, None); b_score.cmp(&a_score) }); @@ -246,8 +246,8 @@ pub async fn decompile(args: DecompilerArgs) -> Result { ); matched_resolved_functions.sort_by(|a, b| { - let a_score = score_signature(&a.signature); - let b_score = score_signature(&b.signature); + let a_score = score_signature(&a.signature, None); + let b_score = score_signature(&b.signature, None); b_score.cmp(&a_score) });