From 4d2b184b1b908adcdce3d7f33286db3f2270a16c Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sat, 22 Jun 2024 11:09:13 +0530 Subject: [PATCH 01/43] Add llm calling and mermaid comment generating functions --- vibi-dpu/Cargo.toml | 2 +- vibi-dpu/src/core/relevance.rs | 39 ++++++++++++++++-- vibi-dpu/src/llm/mod.rs | 1 + vibi-dpu/src/llm/utils.rs | 73 ++++++++++++++++++++++++++++++++++ vibi-dpu/src/main.rs | 1 + 5 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 vibi-dpu/src/llm/mod.rs create mode 100644 vibi-dpu/src/llm/utils.rs diff --git a/vibi-dpu/Cargo.toml b/vibi-dpu/Cargo.toml index 12ff5bc2..76634418 100644 --- a/vibi-dpu/Cargo.toml +++ b/vibi-dpu/Cargo.toml @@ -21,7 +21,7 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" base64ct = "1.5.3" sha256 = "1.1.1" -reqwest = { version = "0.11", features = ["json", "blocking"] } +reqwest = { version = "0.11", features = ["json", "blocking", "stream"] } google-cloud-pubsub = "0.15.0" google-cloud-default = { version = "0.3.0", features = ["pubsub"] } google-cloud-googleapis = "0.9.0" diff --git a/vibi-dpu/src/core/relevance.rs b/vibi-dpu/src/core/relevance.rs index d939e0e7..c0bb2168 100644 --- a/vibi-dpu/src/core/relevance.rs +++ b/vibi-dpu/src/core/relevance.rs @@ -1,6 +1,6 @@ use std::collections::{HashMap, HashSet}; -use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, utils::{aliases::get_login_handles, relevance::Relevance, hunk::{HunkMap, PrHunkItem}, user::ProviderEnum}}; +use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, llm::utils::{call_llm_api, get_changed_files, read_files}, utils::{aliases::get_login_handles, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; use crate::utils::review::Review; use crate::utils::repo_config::RepoConfig; @@ -22,7 +22,7 @@ pub async fn process_relevance(hunkmap: &HunkMap, review: &Review, let relevance_vec = relevance_vec_opt.expect("Empty coverage_obj_opt"); if repo_config.comment() { // create comment text - let comment = comment_text(&relevance_vec, repo_config.auto_assign()); + let comment = comment_text(&relevance_vec, repo_config.auto_assign()).await; // add comment if review.provider().to_string() == ProviderEnum::Bitbucket.to_string() { // TODO - add feature flag check @@ -184,7 +184,7 @@ async fn calculate_relevance(prhunk: &PrHunkItem, review: &mut Review) -> Option return Some(relevance_vec); } -fn comment_text(relevance_vec: &Vec, auto_assign: bool) -> String { +async fn comment_text(relevance_vec: &Vec, auto_assign: bool) -> String { let mut comment = "Relevant users for this PR:\n\n".to_string(); // Added two newlines comment += "| Contributor Name/Alias | Relevance |\n"; // Added a newline at the end comment += "| -------------- | --------------- |\n"; // Added a newline at the end @@ -217,9 +217,42 @@ fn comment_text(relevance_vec: &Vec, auto_assign: bool) -> String { comment += "Relevance of the reviewer is calculated based on the git blame information of the PR. To know more, hit us up at contact@vibinex.com.\n\n"; // Added two newlines comment += "To change comment and auto-assign settings, go to [your Vibinex settings page.](https://vibinex.com/u)\n"; // Added a newline at the end + if let Some(mermaid_text) = mermaid_comment().await { + comment += mermaid_text.as_str(); + } + return comment; } +pub async fn mermaid_comment() -> Option { + match get_changed_files().and_then(read_files) { + Some(file_contents) => { + let prompt = format!( + "Files changed:\n{}\nQuestion: Generate a mermaid diagram to represent the changes.", + file_contents + ); + + match call_llm_api(prompt).await { + Some(mermaid_response) => { + let mermaid_comment = format!( + "### Call Stack Diff\n```mermaid\n{}\n```", + mermaid_response + ); + return Some(mermaid_comment); + } + None => { + log::error!("[mermaid_comment] Failed to call LLM API"); + return None; + } + } + } + None => { + log::error!("[mermaid_comment] Failed to read changed files:"); + return None; + } + } +} + pub fn deduplicated_relevance_vec_for_comment(relevance_vec: &Vec) -> (HashMap, f32>, Vec) { let mut combined_relevance_map: HashMap, f32> = HashMap::new(); let mut unmapped_aliases = Vec::new(); diff --git a/vibi-dpu/src/llm/mod.rs b/vibi-dpu/src/llm/mod.rs new file mode 100644 index 00000000..fab870e3 --- /dev/null +++ b/vibi-dpu/src/llm/mod.rs @@ -0,0 +1 @@ +pub mod utils; \ No newline at end of file diff --git a/vibi-dpu/src/llm/utils.rs b/vibi-dpu/src/llm/utils.rs new file mode 100644 index 00000000..6df78ede --- /dev/null +++ b/vibi-dpu/src/llm/utils.rs @@ -0,0 +1,73 @@ +use std::path::Path; + +use futures_util::StreamExt; +use std::fs; + +use crate::utils::reqwest_client::get_client; + +pub async fn call_llm_api(prompt: String) -> Option { + let client = get_client(); + let url = "https://your-llm-api-endpoint.com"; + let token = "your_api_token"; + + let response_res = client.post(url) + .bearer_auth(token) + .json(&serde_json::json!({"prompt": prompt})) + .send() + .await; + if response_res.is_err() { + let err = response_res.expect_err("No error in response_res"); + log::error!("[call_llm_api] Error in calling api: {:?}", err); + return None; + } + let response = response_res.expect("Uncaught error in response_res"); + let mut final_response = String::new(); + + let mut stream = response.bytes_stream(); + while let Some(item_res) = stream.next().await { + if item_res.is_err() { + let err = item_res.expect_err("Empty error in item_res"); + log::error!("[call_llm_api] Error in parsing stream {:?}", err); + return None; + } + let item = item_res.expect("Empty item_res"); + let chunk = item; + + final_response.push_str(&String::from_utf8_lossy(&chunk)); + } + + Some(final_response) +} + +pub fn get_changed_files() -> Option> { + // Replace this with actual logic to get changed files in the PR + let output_res = std::process::Command::new("git") + .args(&["diff", "--name-only", "HEAD^", "HEAD"]) + .output(); + if output_res.is_err() { + let err = output_res.expect_err("Empty error in output_res"); + log::error!("[get_changed_files] Error in getting diff files: {:?}", err); + return None; + } + let output = output_res.expect("Uncaught error in output_res"); + let files = String::from_utf8_lossy(&output.stdout); + Some(files.lines().map(String::from).collect()) +} + +pub fn read_files(files: Vec) -> Option { + let mut content = String::new(); + + for file in files { + let path = Path::new(&file); + let content_res = fs::read_to_string(path); + if path.exists() { + if content_res.is_err() { + return None; + } + content = content_res.expect("Empty content_res"); + content.push('\n'); + } + } + + Some(content) +} \ No newline at end of file diff --git a/vibi-dpu/src/main.rs b/vibi-dpu/src/main.rs index 14b88d60..66b5f06d 100644 --- a/vibi-dpu/src/main.rs +++ b/vibi-dpu/src/main.rs @@ -7,6 +7,7 @@ mod github; mod utils; mod logger; mod health; +mod llm; use github::auth::app_access_token; use health::status::send_status_start; use tokio::task; From 68b967bb483c54ac5e97f6257c77af16af207f5b Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sun, 23 Jun 2024 00:42:31 +0530 Subject: [PATCH 02/43] remove token and convert return type to option --- vibi-dpu/src/llm/utils.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vibi-dpu/src/llm/utils.rs b/vibi-dpu/src/llm/utils.rs index 6df78ede..80cca9b2 100644 --- a/vibi-dpu/src/llm/utils.rs +++ b/vibi-dpu/src/llm/utils.rs @@ -8,11 +8,9 @@ use crate::utils::reqwest_client::get_client; pub async fn call_llm_api(prompt: String) -> Option { let client = get_client(); let url = "https://your-llm-api-endpoint.com"; - let token = "your_api_token"; let response_res = client.post(url) - .bearer_auth(token) - .json(&serde_json::json!({"prompt": prompt})) + .json(&serde_json::json!({"model": "phind-codellama", "prompt": prompt})) .send() .await; if response_res.is_err() { From 368d610d27031f4bf7e96aa082e782a109eb0432 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sun, 30 Jun 2024 19:59:05 +0530 Subject: [PATCH 03/43] Call llm generate api with proper inputs --- vibi-dpu/src/core/relevance.rs | 127 ++++++++++++++++++++++++++------- vibi-dpu/src/core/review.rs | 14 ++-- vibi-dpu/src/llm/gitops.rs | 91 +++++++++++++++++++++++ vibi-dpu/src/llm/mod.rs | 3 +- vibi-dpu/src/llm/utils.rs | 105 +++++++++++++++------------ vibi-dpu/src/utils/gitops.rs | 2 +- 6 files changed, 262 insertions(+), 80 deletions(-) create mode 100644 vibi-dpu/src/llm/gitops.rs diff --git a/vibi-dpu/src/core/relevance.rs b/vibi-dpu/src/core/relevance.rs index c0bb2168..a142792b 100644 --- a/vibi-dpu/src/core/relevance.rs +++ b/vibi-dpu/src/core/relevance.rs @@ -1,10 +1,10 @@ use std::collections::{HashMap, HashSet}; -use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, llm::utils::{call_llm_api, get_changed_files, read_files}, utils::{aliases::get_login_handles, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; +use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, llm::{gitops::get_changed_files, utils::{call_llm_api, parse_llm_response, read_file}}, utils::{aliases::get_login_handles, gitops::StatItem, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; use crate::utils::review::Review; use crate::utils::repo_config::RepoConfig; -pub async fn process_relevance(hunkmap: &HunkMap, review: &Review, +pub async fn process_relevance(hunkmap: &HunkMap, excluded_files: &Vec, small_files: &Vec, review: &Review, repo_config: &mut RepoConfig, access_token: &str, old_review_opt: &Option, ) { log::info!("Processing relevance of code authors..."); @@ -22,7 +22,8 @@ pub async fn process_relevance(hunkmap: &HunkMap, review: &Review, let relevance_vec = relevance_vec_opt.expect("Empty coverage_obj_opt"); if repo_config.comment() { // create comment text - let comment = comment_text(&relevance_vec, repo_config.auto_assign()).await; + let comment = comment_text(&relevance_vec, repo_config.auto_assign(), + excluded_files, small_files, review).await; // add comment if review.provider().to_string() == ProviderEnum::Bitbucket.to_string() { // TODO - add feature flag check @@ -184,7 +185,8 @@ async fn calculate_relevance(prhunk: &PrHunkItem, review: &mut Review) -> Option return Some(relevance_vec); } -async fn comment_text(relevance_vec: &Vec, auto_assign: bool) -> String { +async fn comment_text(relevance_vec: &Vec, auto_assign: bool, + excluded_files: &Vec, small_files: &Vec, review: &Review) -> String { let mut comment = "Relevant users for this PR:\n\n".to_string(); // Added two newlines comment += "| Contributor Name/Alias | Relevance |\n"; // Added a newline at the end comment += "| -------------- | --------------- |\n"; // Added a newline at the end @@ -208,6 +210,14 @@ async fn comment_text(relevance_vec: &Vec, auto_assign: bool) -> Stri comment += &format!("Missing profile handles for {} aliases. [Go to your Vibinex settings page](https://vibinex.com/settings) to map aliases to profile handles.", unmapped_aliases.len()); } + if !excluded_files.is_empty() { + comment += "\n\n"; + comment += "Ignoring following files due to large size: "; + for file_item in excluded_files { + comment += &format!("- {}\n", file_item.filepath.as_str()); + } + } + if auto_assign { comment += "\n\n"; comment += "Auto assigning to relevant reviewers."; @@ -217,40 +227,103 @@ async fn comment_text(relevance_vec: &Vec, auto_assign: bool) -> Stri comment += "Relevance of the reviewer is calculated based on the git blame information of the PR. To know more, hit us up at contact@vibinex.com.\n\n"; // Added two newlines comment += "To change comment and auto-assign settings, go to [your Vibinex settings page.](https://vibinex.com/u)\n"; // Added a newline at the end - if let Some(mermaid_text) = mermaid_comment().await { + if let Some(mermaid_text) = mermaid_comment(small_files, review).await { comment += mermaid_text.as_str(); } return comment; } -pub async fn mermaid_comment() -> Option { - match get_changed_files().and_then(read_files) { - Some(file_contents) => { - let prompt = format!( - "Files changed:\n{}\nQuestion: Generate a mermaid diagram to represent the changes.", - file_contents - ); - - match call_llm_api(prompt).await { - Some(mermaid_response) => { - let mermaid_comment = format!( - "### Call Stack Diff\n```mermaid\n{}\n```", - mermaid_response - ); - return Some(mermaid_comment); - } - None => { - log::error!("[mermaid_comment] Failed to call LLM API"); - return None; - } - } - } +pub async fn mermaid_comment(small_files: &Vec, review: &Review) -> Option { + let (file_lines_del_map, file_lines_add_map) = get_changed_files(small_files, review); + let files: Vec = small_files.iter().map(|item| item.filepath.clone()).collect(); + let system_prompt_opt = read_file("/app/prompt"); + if system_prompt_opt.is_none() { + log::error!("[mermaid_comment] Unable to read system prompt"); + return None; + } + let system_prompt = system_prompt_opt.expect("Empty system_prompt_opt"); + for file in files { + let file_path = format!("{}/{}", review.clone_dir(), &file); + if !file.ends_with(".rs") { + log::debug!("[mermaid_comment] File extension not valid: {}", &file); + continue; + } + match read_file(&file_path) { None => { log::error!("[mermaid_comment] Failed to read changed files:"); return None; } + Some(file_contents) => { + let numbered_content = file_contents + .lines() + .enumerate() + .map(|(index, line)| format!("{} {}", index + 1, line)) + .collect::>() + .join("\n"); + let call_stack_del_opt = process_call_stack_changes(&system_prompt, + &numbered_content, + &file_lines_del_map, + &file + ).await; + // let call_stack_add_opt = process_call_stack_changes(&system_prompt, + // &numbered_content, + // &file_lines_add_map, + // &file + // ).await; + // if call_stack_add_opt.is_none() || call_stack_del_opt.is_none() { + // log::error!("[mermaid_comment] Unable to generate call stacks for added and deleted lines"); + // return None; + // } + let call_stack_del = call_stack_del_opt.expect("Empty call_stack_del_opt"); + // let call_stack_add = call_stack_add_opt.expect("Empty call_stack_add"); + let mermaid_comment = format!( + "### Call Stack Diff\nDeletions - \n```mermaid\n{}\n```", + call_stack_del, + // call_stack_add + ); + return Some(mermaid_comment); + } } + } + return None; +} + +async fn process_call_stack_changes(system_prompt: &str, + numbered_content: &str, + file_lines_map: &HashMap>, + file: &str +) -> Option { + let lines_vec = &file_lines_map[file]; + log::debug!("[process_call_stack_changes] lines_vec = {:?}", &lines_vec); + for (line_start, line_end) in lines_vec { + log::debug!("[process_call_stack_changes] line start, end = {}, {}", &line_start, &line_end); + let prompt = format!( + "{}\n\n### User Message\nInput -\n{}\n{}\nLine Start - {}\nLine End - {}\n\nOutput -", + system_prompt, + &file, + numbered_content, + line_start, + line_end + ); + match call_llm_api(prompt).await { + None => { + log::error!("[mermaid_comment] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + // let mermaid_response_opt = parse_llm_response(&llm_response); + // if mermaid_response_opt.is_none() { + // log::error!("[process_call_stack_changes] Unable to parse llm response"); + // return None; + // } + // let mermaid_response = mermaid_response_opt.expect("Empty mermaid_response_opt"); + // return Some(mermaid_response); + return None; + } + } + } + return None; } pub fn deduplicated_relevance_vec_for_comment(relevance_vec: &Vec) -> (HashMap, f32>, Vec) { diff --git a/vibi-dpu/src/core/review.rs b/vibi-dpu/src/core/review.rs index dad82e2d..fcff276f 100644 --- a/vibi-dpu/src/core/review.rs +++ b/vibi-dpu/src/core/review.rs @@ -11,7 +11,7 @@ use crate::{ review::{get_review_from_db, save_review_to_db}, }, utils::{ - gitops::{commit_exists, generate_blame, generate_diff, get_excluded_files, git_pull, process_diffmap}, + gitops::{commit_exists, generate_blame, generate_diff, get_excluded_files, git_pull, process_diffmap, StatItem}, hunk::{HunkMap, PrHunkItem}, repo_config::RepoConfig, reqwest_client::get_client, @@ -45,20 +45,20 @@ pub async fn process_review(message_data: &Vec) { send_hunkmap(&hunkmap_opt, &review, &repo_config, &access_token, &old_review_opt).await; } -pub async fn send_hunkmap(hunkmap_opt: &Option, review: &Review, +pub async fn send_hunkmap(hunkmap_opt: &Option<(HunkMap, Vec, Vec)>, review: &Review, repo_config: &RepoConfig, access_token: &str, old_review_opt: &Option) { if hunkmap_opt.is_none() { log::error!("[send_hunkmap] Empty hunkmap in send_hunkmap"); return; } - let hunkmap = hunkmap_opt.to_owned().expect("empty hunkmap_opt"); + let (hunkmap, excluded_files, small_files) = hunkmap_opt.as_ref().expect("empty hunkmap_opt"); log::debug!("HunkMap = {:?}", &hunkmap); store_hunkmap_to_db(&hunkmap, review); publish_hunkmap(&hunkmap); let hunkmap_async = hunkmap.clone(); let review_async = review.clone(); let mut repo_config_clone = repo_config.clone(); - process_relevance(&hunkmap_async, &review_async, + process_relevance(&hunkmap_async, excluded_files, small_files, &review_async, &mut repo_config_clone, access_token, old_review_opt).await; } @@ -73,7 +73,7 @@ fn hunk_already_exists(review: &Review) -> bool { log::debug!("[hunk_already_exists] Hunk already in db!"); return true; } -pub async fn process_review_changes(review: &Review) -> Option{ +pub async fn process_review_changes(review: &Review) -> Option<(HunkMap, Vec, Vec)>{ log::info!("Processing changes in code..."); let mut prvec = Vec::::new(); let fileopt = get_excluded_files(&review); @@ -82,7 +82,7 @@ pub async fn process_review_changes(review: &Review) -> Option{ log::error!("[process_review_changes] No files to review for PR {}", review.id()); return None; } - let (_, smallfiles) = fileopt.expect("fileopt is empty"); + let (excluded_files, smallfiles) = fileopt.expect("fileopt is empty"); let diffmap = generate_diff(&review, &smallfiles); log::debug!("[process_review_changes] diffmap = {:?}", &diffmap); let linemap = process_diffmap(&diffmap); @@ -102,7 +102,7 @@ pub async fn process_review_changes(review: &Review) -> Option{ format!("{}/hunkmap", review.db_key()), ); log::debug!("[process_review_changes] hunkmap: {:?}", hunkmap); - return Some(hunkmap); + return Some((hunkmap, excluded_files, smallfiles)); } pub async fn commit_check(review: &Review, access_token: &str) { diff --git a/vibi-dpu/src/llm/gitops.rs b/vibi-dpu/src/llm/gitops.rs new file mode 100644 index 00000000..71f90cbe --- /dev/null +++ b/vibi-dpu/src/llm/gitops.rs @@ -0,0 +1,91 @@ +use std::{collections::HashMap, process::Command, str}; + +use crate::utils::{gitops::StatItem, review::Review}; + +pub fn get_changed_files(small_files: &Vec, review: &Review) -> (HashMap>, HashMap>) { + // Replace this with actual logic to get changed files in the PR + let mut add_hunks_map = HashMap::>::new(); + let mut del_hunks_map = HashMap::>::new(); + let prev_commit = review.base_head_commit(); + let curr_commit = review.pr_head_commit(); + let clone_dir = review.clone_dir(); + + for item in small_files { + let filepath = item.filepath.as_str(); + let commit_range = format!("{}...{}", prev_commit, curr_commit); + log::debug!("[extract_hunks] | clone_dir = {:?}, filepath = {:?}", clone_dir, filepath); + let output_res = Command::new("git") + .arg("diff") + .arg("--unified=0") + .arg(&commit_range) + .arg(&filepath) + .current_dir(clone_dir) + .output(); + if output_res.is_err() { + let commanderr = output_res.expect_err("No error in output_res"); + log::error!("[extract_hunks] git diff command failed to start : {:?}", commanderr); + continue; + } + let result = output_res.expect("Uncaught error in output_res"); + let diff = result.stdout; + let diffstr_res = str::from_utf8(&diff); + if diffstr_res.is_err() { + let e = diffstr_res.expect_err("No error in diffstr_res"); + log::error!("[extract_hunks] Unable to deserialize diff: {:?}", e); + continue; + } + let diffstr = diffstr_res.expect("Uncaught error in diffstr_res"); + log::debug!("[extract_hunks] diffstr = {}", &diffstr); + + let mut add_hunks = Vec::new(); + let mut del_hunks = Vec::new(); + + for line in diffstr.lines() { + if line.starts_with("@@") { + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() > 2 { + let del_hunk = parts[1]; + let add_hunk = parts[2]; + + if del_hunk.starts_with('-') { + if let Some((start, len)) = parse_hunk_range(del_hunk) { + let end = start + len - 1; + del_hunks.push((start, end)); + } + } + + if add_hunk.starts_with('+') { + if let Some((start, len)) = parse_hunk_range(add_hunk) { + let end = start + len - 1; + add_hunks.push((start, end)); + } + } + } + } + } + + if !add_hunks.is_empty() { + add_hunks_map.insert(filepath.to_string(), add_hunks); + } + if !del_hunks.is_empty() { + del_hunks_map.insert(filepath.to_string(), del_hunks); + } + } + (add_hunks_map, del_hunks_map) +} + +fn parse_hunk_range(hunk: &str) -> Option<(usize, usize)> { + + let hunk = hunk.trim_start_matches(&['-', '+'][..]); + let parts: Vec<&str> = hunk.split(',').collect(); + if parts.len() == 1 { + if let Ok(start) = parts[0].parse::() { + return Some((start, 1)); + } + } else if parts.len() == 2 { + if let (Ok(start), Ok(len)) = (parts[0].parse::(), parts[1].parse::()) { + return Some((start, len)); + } + } + None +} \ No newline at end of file diff --git a/vibi-dpu/src/llm/mod.rs b/vibi-dpu/src/llm/mod.rs index fab870e3..683c5d65 100644 --- a/vibi-dpu/src/llm/mod.rs +++ b/vibi-dpu/src/llm/mod.rs @@ -1 +1,2 @@ -pub mod utils; \ No newline at end of file +pub mod utils; +pub mod gitops; \ No newline at end of file diff --git a/vibi-dpu/src/llm/utils.rs b/vibi-dpu/src/llm/utils.rs index 80cca9b2..42d82164 100644 --- a/vibi-dpu/src/llm/utils.rs +++ b/vibi-dpu/src/llm/utils.rs @@ -1,71 +1,88 @@ use std::path::Path; use futures_util::StreamExt; +use serde::{Deserialize, Serialize}; +use serde_json::json; use std::fs; use crate::utils::reqwest_client::get_client; +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmResponse { + model: String, + created_at: String, + response: String, + done: bool +} + pub async fn call_llm_api(prompt: String) -> Option { let client = get_client(); - let url = "https://your-llm-api-endpoint.com"; - + let url = "http://35.244.9.107/api/generate"; + log::debug!("[call_llm_api] Prompt = {:?}", &prompt); let response_res = client.post(url) - .json(&serde_json::json!({"model": "phind-codellama", "prompt": prompt})) + .json(&json!({"model": "phind-codellama", "prompt": prompt})) .send() .await; - if response_res.is_err() { - let err = response_res.expect_err("No error in response_res"); + + if let Err(err) = response_res { log::error!("[call_llm_api] Error in calling api: {:?}", err); return None; } - let response = response_res.expect("Uncaught error in response_res"); + + let response = response_res.unwrap(); let mut final_response = String::new(); + let resp_text_res = response.text().await; + if resp_text_res.is_err() { + let e = resp_text_res.expect_err("Empty error in resp_text_res"); + log::error!("[call_llm_api] Error while deserializing response to text: {:?}", e); + return None; + } + let resp_text = resp_text_res.expect("Uncaught error in resp_text"); + // Split the string by the sequence "}\n{" + let split_seq = "}\n{"; + let mut chunks = Vec::new(); + let mut start = 0; + while let Some(pos) = &resp_text[start..].find(split_seq) { + let end = start + pos + 1; + chunks.push(&resp_text[start..end]); + start = end + 1; + } - let mut stream = response.bytes_stream(); - while let Some(item_res) = stream.next().await { - if item_res.is_err() { - let err = item_res.expect_err("Empty error in item_res"); - log::error!("[call_llm_api] Error in parsing stream {:?}", err); - return None; + log::debug!("[call_llm_api] chunks = {:?}", &chunks); + for chunk in chunks { + let parsed_chunk_res = serde_json::from_str(&chunk); + if parsed_chunk_res.is_err() { + let e = parsed_chunk_res.expect_err("Empty error in parsed_chunk_res"); + log::error!("[call_llm_api] Unable to deserialize {}: {:?}", chunk, e); + continue; + } + let parsed_chunk: LlmResponse = parsed_chunk_res.expect("Uncaught error in parsed_chunk_res"); + final_response.push_str(&parsed_chunk.response); + if parsed_chunk.done { + break; } - let item = item_res.expect("Empty item_res"); - let chunk = item; - - final_response.push_str(&String::from_utf8_lossy(&chunk)); } - + log::debug!("[call_llm_api] final_response = {:?}", &final_response); Some(final_response) } -pub fn get_changed_files() -> Option> { - // Replace this with actual logic to get changed files in the PR - let output_res = std::process::Command::new("git") - .args(&["diff", "--name-only", "HEAD^", "HEAD"]) - .output(); - if output_res.is_err() { - let err = output_res.expect_err("Empty error in output_res"); - log::error!("[get_changed_files] Error in getting diff files: {:?}", err); +pub fn read_file(file: &str) -> Option { + log::error!("[read_file] file name = {}", &file); + let path = Path::new(file); + let content_res = fs::read_to_string(path); + if !path.exists() { + log::error!("[read_file] Path does not exist: {:?}", &path); return None; } - let output = output_res.expect("Uncaught error in output_res"); - let files = String::from_utf8_lossy(&output.stdout); - Some(files.lines().map(String::from).collect()) -} - -pub fn read_files(files: Vec) -> Option { - let mut content = String::new(); - - for file in files { - let path = Path::new(&file); - let content_res = fs::read_to_string(path); - if path.exists() { - if content_res.is_err() { - return None; - } - content = content_res.expect("Empty content_res"); - content.push('\n'); - } + if content_res.is_err() { + let err = content_res.expect_err("Empty error in content_res"); + log::error!("[read_file] Error in reading content: {:?}", err); + return None; } - + let content = content_res.expect("Empty content_res"); Some(content) +} + +pub fn parse_llm_response(llm_response: &str) -> Option { + return None; } \ No newline at end of file diff --git a/vibi-dpu/src/utils/gitops.rs b/vibi-dpu/src/utils/gitops.rs index d6499c43..a2411dee 100644 --- a/vibi-dpu/src/utils/gitops.rs +++ b/vibi-dpu/src/utils/gitops.rs @@ -17,7 +17,7 @@ use crate::utils::repo::Repository; #[derive(Debug, Serialize, Default, Deserialize)] pub struct StatItem { - filepath: String, + pub filepath: String, additions: i32, deletions: i32, } From 101e01e570bb45e654b1f01412658804cfdc24fb Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Fri, 5 Jul 2024 02:03:52 +0530 Subject: [PATCH 04/43] Implement prompt formatting and llm calling with blocks of code --- vibi-dpu/src/core/relevance.rs | 34 ++++- vibi-dpu/src/llm/function_info.rs | 200 ++++++++++++++++++++++++++++++ vibi-dpu/src/llm/mod.rs | 3 +- vibi-dpu/src/llm/utils.rs | 21 +++- 4 files changed, 253 insertions(+), 5 deletions(-) create mode 100644 vibi-dpu/src/llm/function_info.rs diff --git a/vibi-dpu/src/core/relevance.rs b/vibi-dpu/src/core/relevance.rs index a142792b..44e7d8fd 100644 --- a/vibi-dpu/src/core/relevance.rs +++ b/vibi-dpu/src/core/relevance.rs @@ -1,6 +1,6 @@ use std::collections::{HashMap, HashSet}; -use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, llm::{gitops::get_changed_files, utils::{call_llm_api, parse_llm_response, read_file}}, utils::{aliases::get_login_handles, gitops::StatItem, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; +use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, llm::{function_info::{extract_function_calls, extract_function_import_path, extract_function_lines}, gitops::get_changed_files, utils::{call_llm_api, parse_llm_response, read_file}}, utils::{aliases::get_login_handles, gitops::StatItem, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; use crate::utils::review::Review; use crate::utils::repo_config::RepoConfig; @@ -237,7 +237,7 @@ async fn comment_text(relevance_vec: &Vec, auto_assign: bool, pub async fn mermaid_comment(small_files: &Vec, review: &Review) -> Option { let (file_lines_del_map, file_lines_add_map) = get_changed_files(small_files, review); let files: Vec = small_files.iter().map(|item| item.filepath.clone()).collect(); - let system_prompt_opt = read_file("/app/prompt"); + let system_prompt_opt = read_file("/app/prompt_function_lines"); if system_prompt_opt.is_none() { log::error!("[mermaid_comment] Unable to read system prompt"); return None; @@ -251,7 +251,7 @@ pub async fn mermaid_comment(small_files: &Vec, review: &Review) -> Op } match read_file(&file_path) { None => { - log::error!("[mermaid_comment] Failed to read changed files:"); + log::error!("[mermaid_comment] Failed to read changed files:{}", &file_path); return None; } Some(file_contents) => { @@ -261,11 +261,39 @@ pub async fn mermaid_comment(small_files: &Vec, review: &Review) -> Op .map(|(index, line)| format!("{} {}", index + 1, line)) .collect::>() .join("\n"); + let flinemap_opt = extract_function_lines( + &numbered_content, + &system_prompt, + &file + ).await; + if flinemap_opt.is_none() { + log::debug!( + "[mermaid_comment] Unable to generate function line map for file: {}", &file); + continue; + } + let flinemap = flinemap_opt.expect("Empty flinemap_opt"); let call_stack_del_opt = process_call_stack_changes(&system_prompt, &numbered_content, &file_lines_del_map, &file ).await; + // deleted lines + let del_lines = &file_lines_del_map[&file]; + let called_funcs_opt = extract_function_calls( + del_lines, + &numbered_content, + &file + ).await; + if called_funcs_opt.is_none() { + log::error!("[mermaid_comment] Unable to get called functions for file: {}", &file); + continue; + } + let called_funcs = called_funcs_opt.expect("Empty called_funcs_opt"); + let called_func_paths = extract_function_import_path( + &called_funcs, + &numbered_content, + &file + ).await; // let call_stack_add_opt = process_call_stack_changes(&system_prompt, // &numbered_content, // &file_lines_add_map, diff --git a/vibi-dpu/src/llm/function_info.rs b/vibi-dpu/src/llm/function_info.rs new file mode 100644 index 00000000..3378b87f --- /dev/null +++ b/vibi-dpu/src/llm/function_info.rs @@ -0,0 +1,200 @@ +use serde::{Deserialize, Serialize}; + +use super::utils::{call_llm_api, get_specific_lines, read_file}; + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct FunctionLineMap { + name: String, + line_start: i32, + line_end: i32, + inside: String +} + +impl FunctionLineMap { + pub fn new(name: &str, line_start: i32, line_end: i32, inside: &str) -> Self { + FunctionLineMap { + name: name.to_string(), + line_start, + line_end, + inside: inside.to_string(), + } + } +} + +pub async fn extract_function_lines(numbered_content: &str, system_prompt: &str, file_name: &str) -> Option> { + let mut flines = Vec::::new(); + // split numbered content and start for loop + // Split the numbered_content into lines + let lines: Vec<&str> = numbered_content.lines().collect(); + + // Determine the batch size + let batch_size = 30; + + // Iterate over the lines in chunks of batch_size + for chunk in lines.chunks(batch_size) { + // create prompt + // call llm api + let prompt = format!( + "{}\n\n### User Message\nInput -\n{}\n{}\n\nOutput -", + system_prompt, + file_name, + chunk.join("\n") + ); + match call_llm_api(prompt).await { + None => { + log::error!("[mermaid_comment] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + // parse response to FunctionLineMap + let flinemap_res = serde_json::from_str(&llm_response); + if flinemap_res.is_err() { + let e = flinemap_res.expect_err("Empty error in flinemap_res"); + log::error!( + "[extract_function_lines] Unable to deserialize llm response: {:?}, error - {:?}", + &llm_response, e); + continue; + } + let flinemap = flinemap_res.expect("Uncaught error in flinemap_res"); + // add to vec + flines.push(flinemap); + } + } + } + if flines.is_empty() { + log::error!("[extract_function_lines] No functions extracted"); + return None; + } + let parsed_flines = process_flinemap_response(&flines); + return Some(parsed_flines); +} + +fn process_flinemap_response(flines: &Vec) -> Vec { + let mut resolved_flines = vec![]; + let mut unfinished_function = FunctionLineMap::new("", 0, 0, ""); + for flinemap in flines { + if flinemap.line_end == -1 { + unfinished_function = flinemap.clone(); + continue; + } + if flinemap.name == "unknown" { + if unfinished_function.line_end == -1 { + unfinished_function.line_end = flinemap.line_start; + resolved_flines.push(unfinished_function.clone()); + continue; + } + } + resolved_flines.push(flinemap.to_owned()); + } + + return resolved_flines; +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct CalledFunction { + name: String, + line: usize +} + +pub async fn extract_function_calls(hunk_lines: &Vec<(usize, usize)>, numbered_content: &str, file_name: &str) -> Option> { + // extract hunk lines from numbered content + let user_prompt = get_specific_lines( + hunk_lines.to_owned(), numbered_content); + // prepare prompt and call llm api + let system_prompt_opt = read_file("/app/prompt_function_calls"); + if system_prompt_opt.is_none() { + log::error!("[extract_function_calls] Unable to read system prompt /app/prompt_function_calls"); + return None; + } + let system_prompt = system_prompt_opt.expect("Empty system_prompt_opt"); + let prompt = format!( + "{}\n\n### User Message\nInput -\n{}\n{}\n\nOutput -", + &system_prompt, + file_name, + &user_prompt + ); + match call_llm_api(prompt).await { + None => { + log::error!("[extract_function_calls] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + // parse response and return CalledFunction Vec + // optional - paginate + let called_functions_res = serde_json::from_str(&llm_response); + if called_functions_res.is_err() { + let e = called_functions_res.expect_err("Empty error in called_functions_res"); + log::error!( + "[extract_function_calls] Unable to deserialize called_functions: {:?}", e); + return None; + } + let called_functions: Vec = called_functions_res.expect("Uncaught error in called_functions_res"); + return Some(called_functions); + } + } +} + +#[derive(Debug, Default, Deserialize, Clone)] +pub struct CalledFunctionPath { + path: String, + line: u32 +} +pub async fn extract_function_import_path(called_funcs: &Vec, numbered_content: &str, file_name: &str) -> Option> { + let system_prompt_opt = read_file("/app/prompt_function_call_path"); + if system_prompt_opt.is_none() { + log::error!("[extract_function_calls] Unable to read system prompt /app/prompt_function_calls"); + return None; + } + let system_prompt = system_prompt_opt.expect("Empty system_prompt_opt"); + let mut user_prompt = String::new(); + // search in numbered content for called functions + let numbered_lines: Vec<&str> = numbered_content.lines().collect(); + for called_func in called_funcs { + // extract hunk lines from numbered content or get it as input + let first_occurence_line_opt = find_first_occurence(&numbered_lines, &called_func.name); + if first_occurence_line_opt.is_none() { + log::debug!("[extract_function_import_path] No first occurence found for: {}", &called_func.name); + continue; + } + let first_occurence_line = first_occurence_line_opt.expect("Empty first_occurence_line_opt"); + user_prompt.push_str(first_occurence_line.as_str()); + user_prompt.push_str("\n"); + user_prompt.push_str(numbered_lines[called_func.line]); + user_prompt.push_str("\n"); + } + // prepare prompt with hunk lines and occurences and call llm api + let prompt = format!( + "{}\n\n### User Message\nInput -\n{}\n{}\n\nOutput -", + &system_prompt, + file_name, + &user_prompt + ); + // extract CalledFunctionPath vec from responses and return + match call_llm_api(prompt).await { + None => { + log::error!("[extract_function_import_path] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + let called_functions_res = serde_json::from_str(&llm_response); + if called_functions_res.is_err() { + let e = called_functions_res.expect_err("Empty error in called_functions_res"); + log::error!( + "[extract_function_calls] Unable to deserialize called_functions: {:?}", e); + return None; + } + let called_func_paths: Vec = called_functions_res.expect("Uncaught error in called_functions_res"); + return Some(called_func_paths); + } + } + // optional - paginate +} + +fn find_first_occurence(lines: &Vec<&str>, func_name: &str) -> Option { + for line in lines { + if line.contains(func_name) { + return Some(line.to_owned().to_owned()); + } + } + return None; +} \ No newline at end of file diff --git a/vibi-dpu/src/llm/mod.rs b/vibi-dpu/src/llm/mod.rs index 683c5d65..3561768b 100644 --- a/vibi-dpu/src/llm/mod.rs +++ b/vibi-dpu/src/llm/mod.rs @@ -1,2 +1,3 @@ pub mod utils; -pub mod gitops; \ No newline at end of file +pub mod gitops; +pub mod function_info; \ No newline at end of file diff --git a/vibi-dpu/src/llm/utils.rs b/vibi-dpu/src/llm/utils.rs index 42d82164..33c2b1f3 100644 --- a/vibi-dpu/src/llm/utils.rs +++ b/vibi-dpu/src/llm/utils.rs @@ -1,4 +1,4 @@ -use std::path::Path; +use std::{collections::HashMap, path::Path}; use futures_util::StreamExt; use serde::{Deserialize, Serialize}; @@ -85,4 +85,23 @@ pub fn read_file(file: &str) -> Option { pub fn parse_llm_response(llm_response: &str) -> Option { return None; +} + +pub fn get_specific_lines(line_numbers: Vec<(usize, usize)>, numbered_content: &str) -> String { + // Split the input content into lines and collect into a vector + let lines: Vec<&str> = numbered_content.lines().collect(); + let mut result = String::new(); + + // Iterate over each line number we are interested in + for (start, end) in line_numbers { + for line_number in start..=end { + // Check if the line_number is within the bounds of the vector + if line_number < lines.len() { + result.push_str(lines[line_number]); + result.push('\n'); + } + } + } + + return result; } \ No newline at end of file From 9442eb5e3df276d1fa40b60da09d3e9a6422a218 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sun, 7 Jul 2024 00:04:45 +0530 Subject: [PATCH 05/43] modularised code and added elemets to generate mermaid components --- vibi-dpu/src/core/relevance.rs | 126 ++--------------- vibi-dpu/src/llm/function_info.rs | 23 +-- vibi-dpu/src/llm/mermaid_elements.rs | 201 +++++++++++++++++++++++++++ vibi-dpu/src/llm/mod.rs | 3 +- 4 files changed, 228 insertions(+), 125 deletions(-) create mode 100644 vibi-dpu/src/llm/mermaid_elements.rs diff --git a/vibi-dpu/src/core/relevance.rs b/vibi-dpu/src/core/relevance.rs index 44e7d8fd..d0206fb0 100644 --- a/vibi-dpu/src/core/relevance.rs +++ b/vibi-dpu/src/core/relevance.rs @@ -1,6 +1,6 @@ use std::collections::{HashMap, HashSet}; -use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, llm::{function_info::{extract_function_calls, extract_function_import_path, extract_function_lines}, gitops::get_changed_files, utils::{call_llm_api, parse_llm_response, read_file}}, utils::{aliases::get_login_handles, gitops::StatItem, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; +use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, llm::mermaid_elements::generate_mermaid_flowchart, utils::{aliases::get_login_handles, gitops::StatItem, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; use crate::utils::review::Review; use crate::utils::repo_config::RepoConfig; @@ -235,123 +235,17 @@ async fn comment_text(relevance_vec: &Vec, auto_assign: bool, } pub async fn mermaid_comment(small_files: &Vec, review: &Review) -> Option { - let (file_lines_del_map, file_lines_add_map) = get_changed_files(small_files, review); - let files: Vec = small_files.iter().map(|item| item.filepath.clone()).collect(); - let system_prompt_opt = read_file("/app/prompt_function_lines"); - if system_prompt_opt.is_none() { - log::error!("[mermaid_comment] Unable to read system prompt"); + let flowchart_str_opt = generate_mermaid_flowchart(small_files, review).await; + if flowchart_str_opt.is_none() { + log::error!("[mermaid_comment] Unable to generate flowchart for review: {}", review.id()); return None; } - let system_prompt = system_prompt_opt.expect("Empty system_prompt_opt"); - for file in files { - let file_path = format!("{}/{}", review.clone_dir(), &file); - if !file.ends_with(".rs") { - log::debug!("[mermaid_comment] File extension not valid: {}", &file); - continue; - } - match read_file(&file_path) { - None => { - log::error!("[mermaid_comment] Failed to read changed files:{}", &file_path); - return None; - } - Some(file_contents) => { - let numbered_content = file_contents - .lines() - .enumerate() - .map(|(index, line)| format!("{} {}", index + 1, line)) - .collect::>() - .join("\n"); - let flinemap_opt = extract_function_lines( - &numbered_content, - &system_prompt, - &file - ).await; - if flinemap_opt.is_none() { - log::debug!( - "[mermaid_comment] Unable to generate function line map for file: {}", &file); - continue; - } - let flinemap = flinemap_opt.expect("Empty flinemap_opt"); - let call_stack_del_opt = process_call_stack_changes(&system_prompt, - &numbered_content, - &file_lines_del_map, - &file - ).await; - // deleted lines - let del_lines = &file_lines_del_map[&file]; - let called_funcs_opt = extract_function_calls( - del_lines, - &numbered_content, - &file - ).await; - if called_funcs_opt.is_none() { - log::error!("[mermaid_comment] Unable to get called functions for file: {}", &file); - continue; - } - let called_funcs = called_funcs_opt.expect("Empty called_funcs_opt"); - let called_func_paths = extract_function_import_path( - &called_funcs, - &numbered_content, - &file - ).await; - // let call_stack_add_opt = process_call_stack_changes(&system_prompt, - // &numbered_content, - // &file_lines_add_map, - // &file - // ).await; - // if call_stack_add_opt.is_none() || call_stack_del_opt.is_none() { - // log::error!("[mermaid_comment] Unable to generate call stacks for added and deleted lines"); - // return None; - // } - let call_stack_del = call_stack_del_opt.expect("Empty call_stack_del_opt"); - // let call_stack_add = call_stack_add_opt.expect("Empty call_stack_add"); - let mermaid_comment = format!( - "### Call Stack Diff\nDeletions - \n```mermaid\n{}\n```", - call_stack_del, - // call_stack_add - ); - return Some(mermaid_comment); - } - } - } - return None; -} - -async fn process_call_stack_changes(system_prompt: &str, - numbered_content: &str, - file_lines_map: &HashMap>, - file: &str -) -> Option { - let lines_vec = &file_lines_map[file]; - log::debug!("[process_call_stack_changes] lines_vec = {:?}", &lines_vec); - for (line_start, line_end) in lines_vec { - log::debug!("[process_call_stack_changes] line start, end = {}, {}", &line_start, &line_end); - let prompt = format!( - "{}\n\n### User Message\nInput -\n{}\n{}\nLine Start - {}\nLine End - {}\n\nOutput -", - system_prompt, - &file, - numbered_content, - line_start, - line_end - ); - match call_llm_api(prompt).await { - None => { - log::error!("[mermaid_comment] Failed to call LLM API"); - return None; - } - Some(llm_response) => { - // let mermaid_response_opt = parse_llm_response(&llm_response); - // if mermaid_response_opt.is_none() { - // log::error!("[process_call_stack_changes] Unable to parse llm response"); - // return None; - // } - // let mermaid_response = mermaid_response_opt.expect("Empty mermaid_response_opt"); - // return Some(mermaid_response); - return None; - } - } - } - return None; + let flowchart_str = flowchart_str_opt.expect("Empty flowchart_str_opt"); + let mermaid_comment = format!( + "### Call Stack Diff\n```mermaid\n{}\n```", + flowchart_str, + ); + return Some(mermaid_comment); } pub fn deduplicated_relevance_vec_for_comment(relevance_vec: &Vec) -> (HashMap, f32>, Vec) { diff --git a/vibi-dpu/src/llm/function_info.rs b/vibi-dpu/src/llm/function_info.rs index 3378b87f..38a3c64c 100644 --- a/vibi-dpu/src/llm/function_info.rs +++ b/vibi-dpu/src/llm/function_info.rs @@ -4,10 +4,10 @@ use super::utils::{call_llm_api, get_specific_lines, read_file}; #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FunctionLineMap { - name: String, - line_start: i32, - line_end: i32, - inside: String + pub name: String, + pub line_start: i32, + pub line_end: i32, + pub inside: String } impl FunctionLineMap { @@ -21,7 +21,13 @@ impl FunctionLineMap { } } -pub async fn extract_function_lines(numbered_content: &str, system_prompt: &str, file_name: &str) -> Option> { +pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> Option> { + let system_prompt_opt = read_file("/app/prompt_function_lines"); + if system_prompt_opt.is_none() { + log::error!("[mermaid_comment] Unable to read system prompt"); + return None; + } + let system_prompt = system_prompt_opt.expect("Empty system_prompt_opt"); let mut flines = Vec::::new(); // split numbered content and start for loop // Split the numbered_content into lines @@ -92,8 +98,8 @@ fn process_flinemap_response(flines: &Vec) -> Vec, numbered_content: &str, file_name: &str) -> Option> { @@ -136,7 +142,8 @@ pub async fn extract_function_calls(hunk_lines: &Vec<(usize, usize)>, numbered_c #[derive(Debug, Default, Deserialize, Clone)] pub struct CalledFunctionPath { - path: String, + pub path: String, + pub function_name: String, line: u32 } pub async fn extract_function_import_path(called_funcs: &Vec, numbered_content: &str, file_name: &str) -> Option> { diff --git a/vibi-dpu/src/llm/mermaid_elements.rs b/vibi-dpu/src/llm/mermaid_elements.rs new file mode 100644 index 00000000..6a977029 --- /dev/null +++ b/vibi-dpu/src/llm/mermaid_elements.rs @@ -0,0 +1,201 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use crate::utils::{gitops::StatItem, review::Review}; + +use super::{function_info::{extract_function_calls, extract_function_import_path, extract_function_lines, CalledFunction, CalledFunctionPath, FunctionLineMap}, gitops::get_changed_files, utils::read_file}; + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct MermaidSubgraph { + subgraph_str: Option, + nodes: HashMap +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct MermaidNode { + node_str: Option, + function_name: String +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct MermaidEdge { + edge_str: Option, + caller_function: String, + called_function: String, + color: String +} + +pub async fn generate_mermaid_flowchart(small_files: &Vec, review: &Review) -> Option { + let flowchart_content_res = generate_flowchart_elements(small_files, review).await; + if flowchart_content_res.is_none() { + log::error!("[generate_mermaid_flowchart] Unable to generate flowchart content, review: {}", review.id()); + return None; + } + let flowchart_content = flowchart_content_res.expect("Empty flowchart_content_res"); + let flowchart_str = format!( + "%%{{init: {{\"flowchart\": {{\"htmlLabels\": false}}}} }}%%\nflowchart LR{}\n", + &flowchart_content + ); + return Some(flowchart_str); +} + +async fn generate_flowchart_elements(small_files: &Vec, review: &Review) -> Option { + let (file_lines_del_map, file_lines_add_map) = get_changed_files(small_files, review); + let subgraph_map = HashMap::::new(); + let mut edges_vec = Vec::::new(); + let files: Vec = small_files.iter().map(|item| item.filepath.clone()).collect(); + for file in files { + generate_mermaid_content( + &subgraph_map, + review, + &file, + &file_lines_del_map, + &file_lines_add_map, + &mut edges_vec, + ).await; + } + // Render content string + return None; +} + +async fn generate_mermaid_content( + subgraph_map: &HashMap, review: &Review, file: &str, + file_lines_del_map: &HashMap>, + file_lines_add_map: &HashMap>, + edges_vec: &mut Vec +) { + if !file.ends_with(".rs") { + log::debug!("[mermaid_comment] File extension not valid: {}", &file); + return; + } + let file_path = format!("{}/{}", review.clone_dir(), &file); + let file_contents_res = read_file(&file_path); + if file_contents_res.is_none() { + log::error!( + "[generate_mermaid_content] Unable to read changed file content: {}", &file_path); + return; + } + let file_contents = file_contents_res.expect("Empty file_contents_res"); + let numbered_content = file_contents + .lines() + .enumerate() + .map(|(index, line)| format!("{} {}", index + 1, line)) + .collect::>() + .join("\n"); + let flinemap_opt = extract_function_lines( + &numbered_content, + file + ).await; + if flinemap_opt.is_none() { + log::debug!( + "[generate_mermaid_content] Unable to generate function line map for file: {}", file); + return; + } + let flinemap = flinemap_opt.expect("Empty flinemap_opt"); + // deleted lines + let called_info_del_opt = generate_called_function_info( + file_lines_del_map, &numbered_content, file).await; + if called_info_del_opt.is_none() { + log::error!("[generate_mermaid_content] Unable to generate called functions info"); + return; + } + let (called_funcs_del, called_func_paths_del) = called_info_del_opt.expect("Empty called_info_opt"); + generate_callee_nodes(&called_func_paths_del, subgraph_map); + generate_caller_elements(file, &file_lines_del_map[file], &flinemap, &called_funcs_del, edges_vec, "red"); + // added lines + let called_info_del_opt = generate_called_function_info( + file_lines_add_map, &numbered_content, file).await; + if called_info_del_opt.is_none() { + log::error!("[generate_mermaid_content] Unable to generate called functions info"); + return; + } + let (called_funcs_add, called_func_paths_add) = called_info_del_opt.expect("Empty called_info_opt"); + generate_callee_nodes(&called_func_paths_add, subgraph_map); + generate_caller_elements(file, &file_lines_del_map[file], &flinemap, &called_funcs_del, edges_vec, "green"); + return; +} + +fn generate_caller_elements(filename: &str, + hunk_lines: &Vec<(usize, usize)>, + flinemap: &Vec, + called_funcs_del: &Vec, edges_vec: &mut Vec, color: &str + ) +{ + let mut relevant_funcs = Vec::::new(); + for cf in called_funcs_del { + let func_name_opt = get_func_from_line(hunk_lines, cf.line, flinemap); + if func_name_opt.is_none() { + log::debug!("[generate_caller_elements] Unable to get func name for line: {:?}", cf.line); + continue; + } + let func_name = func_name_opt.expect("Empty func_name_opt"); + relevant_funcs.push(func_name.clone()); + edges_vec.push(MermaidEdge{ + edge_str: None, + caller_function: func_name, + called_function: cf.name.to_string(), + color: color.to_string() + }) + } + for rf in relevant_funcs { + // Add mermaid node for func in correct mermaid subgraph + } +} + +fn get_func_from_line(hunk_lines: &[(usize, usize)], line: usize, flinemaps: &[FunctionLineMap]) -> Option { + for flinemap in flinemaps { + if flinemap.line_start >= line as i32 && flinemap.line_end <= line as i32 { + return Some(flinemap.name.to_string()); + } + } + return None; +} + +fn generate_callee_nodes( + called_funcs_path: &[CalledFunctionPath], + subgraph_map: &HashMap) +{ + for cfp in called_funcs_path { + if let Some(subgraph) = subgraph_map.to_owned().get_mut(&cfp.path) { + subgraph.nodes.insert( + cfp.function_name.to_string(), + MermaidNode { node_str: None, function_name: cfp.function_name.to_string()} + ); + } else { + // Create new subgraph + // Create new node + // Add to subgraph_map + } + } + return; +} + +async fn generate_called_function_info(file_lines_map: &HashMap>, + numbered_content: &str, filename: &str +) + -> Option<(Vec, Vec)> +{ + let del_lines = &file_lines_map[filename]; + let called_funcs_opt = extract_function_calls( + del_lines, + &numbered_content, + filename + ).await; + if called_funcs_opt.is_none() { + log::error!("[generate_called_function_info] Unable to get called functions for file: {}", filename); + return None; + } + let called_funcs = called_funcs_opt.expect("Empty called_funcs_opt"); + let called_func_paths_opt = extract_function_import_path( + &called_funcs, + &numbered_content, + filename + ).await; + if called_func_paths_opt.is_none() { + log::error!("[generate_called_function_info] Unable to get called functions for file: {}", filename); + return None; + } + let called_func_paths = called_func_paths_opt.expect("Empty called_func_paths_opt"); + return Some((called_funcs, called_func_paths)); +} \ No newline at end of file diff --git a/vibi-dpu/src/llm/mod.rs b/vibi-dpu/src/llm/mod.rs index 3561768b..7226af6e 100644 --- a/vibi-dpu/src/llm/mod.rs +++ b/vibi-dpu/src/llm/mod.rs @@ -1,3 +1,4 @@ pub mod utils; pub mod gitops; -pub mod function_info; \ No newline at end of file +pub mod function_info; +pub mod mermaid_elements; \ No newline at end of file From bbc74a8590b9725c51638669e10b4c0b4ba67195 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sun, 7 Jul 2024 23:26:01 +0530 Subject: [PATCH 06/43] Add comment generating functions --- vibi-dpu/src/llm/elements.rs | 187 +++++++++++++++++++++++++++ vibi-dpu/src/llm/mermaid_elements.rs | 113 ++++++++-------- vibi-dpu/src/llm/mod.rs | 3 +- vibi-dpu/src/llm/utils.rs | 14 ++ 4 files changed, 264 insertions(+), 53 deletions(-) create mode 100644 vibi-dpu/src/llm/elements.rs diff --git a/vibi-dpu/src/llm/elements.rs b/vibi-dpu/src/llm/elements.rs new file mode 100644 index 00000000..06ae3440 --- /dev/null +++ b/vibi-dpu/src/llm/elements.rs @@ -0,0 +1,187 @@ +use std::{borrow::BorrowMut, collections::HashMap}; +use serde::{Serialize, Deserialize}; + +use super::utils::generate_random_string; + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct MermaidSubgraph { + name: String, + nodes: HashMap, + mermaid_id: String +} + +impl MermaidSubgraph { + // Constructor + pub fn new(name: String, nodes: HashMap) -> Self { + let mermaid_id = generate_random_string(4); + Self { name, nodes, mermaid_id } + } + + // Getter for nodes + pub fn nodes(&self) -> &HashMap { + &self.nodes + } + + // Setter for nodes + pub fn set_nodes(&mut self, nodes: HashMap) { + self.nodes = nodes; + } + + pub fn add_node(&mut self, node: MermaidNode) { + self.nodes.insert(node.function_name.to_string(), node); + } + + pub fn render_subgraph(&self) -> String{ + let mut all_nodes = Vec::new(); + for (_, node) in self.nodes() { + all_nodes.push(node.render_node()); + } + let subgraph_str = format!( + "\tsubgraph {} [{}]\n{}\t\tend\n", + self.mermaid_id, + self.name, + all_nodes.join("\n") + ); + // self.set_subgraph_str(Some(subgraph_str)); + return subgraph_str; + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct MermaidNode { + function_name: String, + mermaid_id: String, +} + +impl MermaidNode { + // Constructor + pub fn new( function_name: String) -> Self { + let mermaid_id = generate_random_string(4); + Self { mermaid_id, function_name } + } + + // Getter for function_name + pub fn function_name(&self) -> &String { + &self.function_name + } + + // Getter for mermaid_id + pub fn mermaid_id(&self) -> &String { + &self.mermaid_id + } + + // Setter for function_name + pub fn set_function_name(&mut self, function_name: String) { + self.function_name = function_name; + } + + pub fn render_node(&self) -> String { + let node_str = format!("\t{}[{}]", &self.mermaid_id, &self.function_name); + // self.set_node_str(Some(node_str.clone())); + return node_str; + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct MermaidEdge { + line: usize, + caller_function: MermaidNode, + called_function: MermaidNode, + color: String, +} + +impl MermaidEdge { + // Constructor + pub fn new(line: usize, caller_function: MermaidNode, called_function: MermaidNode, color: String) -> Self { + Self { line, caller_function, called_function, color } + } + + // Getter for edge_str + pub fn line(&self) -> usize { + self.line + } + + // Getter for caller_function + pub fn caller_function(&self) -> &MermaidNode { + &self.caller_function + } + + // Setter for caller_function + pub fn set_caller_function(&mut self, caller_function: MermaidNode) { + self.caller_function = caller_function; + } + + // Getter for called_function + pub fn called_function(&self) -> &MermaidNode { + &self.called_function + } + + // Setter for called_function + pub fn set_called_function(&mut self, called_function: MermaidNode) { + self.called_function = called_function; + } + + // Getter for color + pub fn color(&self) -> &String { + &self.color + } + + // Setter for color + pub fn set_color(&mut self, color: String) { + self.color = color; + } + + pub fn render_edge_definition(&self) -> String { + let edge_str = format!( + "\t{} -- Line {} --> {}\n", + self.caller_function().mermaid_id(), + self.line, + self.called_function().mermaid_id(), + ); + return edge_str; + } + + pub fn render_edge_style(&self) -> String { + let style_str = format!( + "stroke:{},stroke-width:4px;", + self.color() + ); + return style_str; + } +} + + +pub struct MermaidEdges { + all_edges: Vec, +} + +impl MermaidEdges { + pub fn new(all_edges: Vec) -> Self { + MermaidEdges {all_edges } + } + + pub fn all_edges(&self) -> &Vec { + return &self.all_edges; + } + + pub fn add_edge(&mut self, edge: MermaidEdge) { + self.all_edges.push(edge); + } + + pub fn render_edges(&self) -> String { + let mut all_edges = Vec::::new(); + let mut all_edges_style = Vec::::new(); + for (idx, edge) in self.all_edges().iter().enumerate() { + all_edges.push(edge.render_edge_definition()); + all_edges_style.push( + format!("\tlinkStyle {} {}", idx, edge.render_edge_style()) + ); + } + let all_edges_str = format!( + "{}{}", + all_edges.join("\n"), + all_edges_style.join("\n") + ); + return all_edges_str; + } +} \ No newline at end of file diff --git a/vibi-dpu/src/llm/mermaid_elements.rs b/vibi-dpu/src/llm/mermaid_elements.rs index 6a977029..35c3c74c 100644 --- a/vibi-dpu/src/llm/mermaid_elements.rs +++ b/vibi-dpu/src/llm/mermaid_elements.rs @@ -1,30 +1,8 @@ use std::collections::HashMap; -use serde::{Deserialize, Serialize}; - use crate::utils::{gitops::StatItem, review::Review}; -use super::{function_info::{extract_function_calls, extract_function_import_path, extract_function_lines, CalledFunction, CalledFunctionPath, FunctionLineMap}, gitops::get_changed_files, utils::read_file}; - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct MermaidSubgraph { - subgraph_str: Option, - nodes: HashMap -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct MermaidNode { - node_str: Option, - function_name: String -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct MermaidEdge { - edge_str: Option, - caller_function: String, - called_function: String, - color: String -} +use super::{elements::{MermaidEdge, MermaidEdges, MermaidNode, MermaidSubgraph}, function_info::{extract_function_calls, extract_function_import_path, extract_function_lines, CalledFunction, CalledFunctionPath, FunctionLineMap}, gitops::get_changed_files, utils::read_file}; pub async fn generate_mermaid_flowchart(small_files: &Vec, review: &Review) -> Option { let flowchart_content_res = generate_flowchart_elements(small_files, review).await; @@ -42,12 +20,12 @@ pub async fn generate_mermaid_flowchart(small_files: &Vec, review: &Re async fn generate_flowchart_elements(small_files: &Vec, review: &Review) -> Option { let (file_lines_del_map, file_lines_add_map) = get_changed_files(small_files, review); - let subgraph_map = HashMap::::new(); - let mut edges_vec = Vec::::new(); + let mut subgraph_map = HashMap::::new(); + let mut edges_vec = MermaidEdges::new(Vec::::new()); let files: Vec = small_files.iter().map(|item| item.filepath.clone()).collect(); for file in files { generate_mermaid_content( - &subgraph_map, + &mut subgraph_map, review, &file, &file_lines_del_map, @@ -60,10 +38,10 @@ async fn generate_flowchart_elements(small_files: &Vec, review: &Revie } async fn generate_mermaid_content( - subgraph_map: &HashMap, review: &Review, file: &str, + subgraph_map: &mut HashMap, review: &Review, file: &str, file_lines_del_map: &HashMap>, file_lines_add_map: &HashMap>, - edges_vec: &mut Vec + edges: &mut MermaidEdges ) { if !file.ends_with(".rs") { log::debug!("[mermaid_comment] File extension not valid: {}", &file); @@ -102,7 +80,17 @@ async fn generate_mermaid_content( } let (called_funcs_del, called_func_paths_del) = called_info_del_opt.expect("Empty called_info_opt"); generate_callee_nodes(&called_func_paths_del, subgraph_map); - generate_caller_elements(file, &file_lines_del_map[file], &flinemap, &called_funcs_del, edges_vec, "red"); + let file_subgraph = MermaidSubgraph::new( + file.to_string(), HashMap::::new()); + generate_caller_elements( + subgraph_map, + &file_lines_del_map[file], + &flinemap, + &called_funcs_del, + &called_func_paths_del, + &file_subgraph, + edges, + "red"); // added lines let called_info_del_opt = generate_called_function_info( file_lines_add_map, &numbered_content, file).await; @@ -112,38 +100,53 @@ async fn generate_mermaid_content( } let (called_funcs_add, called_func_paths_add) = called_info_del_opt.expect("Empty called_info_opt"); generate_callee_nodes(&called_func_paths_add, subgraph_map); - generate_caller_elements(file, &file_lines_del_map[file], &flinemap, &called_funcs_del, edges_vec, "green"); + generate_caller_elements( + subgraph_map, + &file_lines_del_map[file], + &flinemap, + &called_funcs_add, + &called_func_paths_add, + &file_subgraph, + edges, + "green"); + subgraph_map.insert(file.to_string(), file_subgraph); return; } -fn generate_caller_elements(filename: &str, +fn generate_caller_elements(subgraph_map: &HashMap, hunk_lines: &Vec<(usize, usize)>, flinemap: &Vec, - called_funcs_del: &Vec, edges_vec: &mut Vec, color: &str - ) + called_funcs: &Vec, + called_funcs_path: &Vec, + file_subgraph: &MermaidSubgraph, + edges: &mut MermaidEdges, + color: &str) { - let mut relevant_funcs = Vec::::new(); - for cf in called_funcs_del { - let func_name_opt = get_func_from_line(hunk_lines, cf.line, flinemap); + for cf in called_funcs { + let func_name_opt = get_func_from_line(cf.line, flinemap); if func_name_opt.is_none() { log::debug!("[generate_caller_elements] Unable to get func name for line: {:?}", cf.line); continue; } let func_name = func_name_opt.expect("Empty func_name_opt"); - relevant_funcs.push(func_name.clone()); - edges_vec.push(MermaidEdge{ - edge_str: None, - caller_function: func_name, - called_function: cf.name.to_string(), - color: color.to_string() - }) - } - for rf in relevant_funcs { - // Add mermaid node for func in correct mermaid subgraph + let caller_node = match file_subgraph.nodes().get(&func_name) { + Some(node) => node.to_owned(), + None => MermaidNode::new(func_name.clone()) + }; + for cfp in called_funcs_path { + if cf.name == cfp.function_name { + edges.add_edge(MermaidEdge::new( + cf.line, + caller_node.to_owned(), + subgraph_map[&cfp.path].nodes()[&cf.name].to_owned(), + color.to_string() + )); + } + } } } -fn get_func_from_line(hunk_lines: &[(usize, usize)], line: usize, flinemaps: &[FunctionLineMap]) -> Option { +fn get_func_from_line(line: usize, flinemaps: &[FunctionLineMap]) -> Option { for flinemap in flinemaps { if flinemap.line_start >= line as i32 && flinemap.line_end <= line as i32 { return Some(flinemap.name.to_string()); @@ -154,18 +157,24 @@ fn get_func_from_line(hunk_lines: &[(usize, usize)], line: usize, flinemaps: &[F fn generate_callee_nodes( called_funcs_path: &[CalledFunctionPath], - subgraph_map: &HashMap) + subgraph_map: &mut HashMap) { for cfp in called_funcs_path { - if let Some(subgraph) = subgraph_map.to_owned().get_mut(&cfp.path) { - subgraph.nodes.insert( - cfp.function_name.to_string(), - MermaidNode { node_str: None, function_name: cfp.function_name.to_string()} + if let Some(subgraph) = subgraph_map.get_mut(&cfp.path) { + subgraph.add_node( + MermaidNode::new(cfp.function_name.to_string()) ); } else { // Create new subgraph // Create new node // Add to subgraph_map + let mut node_map = HashMap::::new(); + node_map.insert(cfp.function_name.to_string(), MermaidNode::new(cfp.function_name.to_string())); + let subgraph = MermaidSubgraph::new( + cfp.path.to_string(), + node_map + ); + subgraph_map.insert(cfp.path.to_string(), subgraph); } } return; diff --git a/vibi-dpu/src/llm/mod.rs b/vibi-dpu/src/llm/mod.rs index 7226af6e..d816a721 100644 --- a/vibi-dpu/src/llm/mod.rs +++ b/vibi-dpu/src/llm/mod.rs @@ -1,4 +1,5 @@ pub mod utils; pub mod gitops; pub mod function_info; -pub mod mermaid_elements; \ No newline at end of file +pub mod mermaid_elements; +pub mod elements; \ No newline at end of file diff --git a/vibi-dpu/src/llm/utils.rs b/vibi-dpu/src/llm/utils.rs index 33c2b1f3..e8955118 100644 --- a/vibi-dpu/src/llm/utils.rs +++ b/vibi-dpu/src/llm/utils.rs @@ -4,6 +4,8 @@ use futures_util::StreamExt; use serde::{Deserialize, Serialize}; use serde_json::json; use std::fs; +use rand::Rng; + use crate::utils::reqwest_client::get_client; @@ -104,4 +106,16 @@ pub fn get_specific_lines(line_numbers: Vec<(usize, usize)>, numbered_content: & } return result; +} + +pub fn generate_random_string(length: usize) -> String { + const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; + let mut rng = rand::thread_rng(); + let random_string: String = (0..length) + .map(|_| { + let idx = rng.gen_range(0..CHARSET.len()); + CHARSET[idx] as char + }) + .collect(); + random_string } \ No newline at end of file From 3b62d4c2a4b280b3e2687161336d43ecbafef9dd Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sun, 7 Jul 2024 23:38:57 +0530 Subject: [PATCH 07/43] Add code to render all subgraphs and edges and concatenate --- vibi-dpu/src/llm/mermaid_elements.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/vibi-dpu/src/llm/mermaid_elements.rs b/vibi-dpu/src/llm/mermaid_elements.rs index 35c3c74c..5c23fdf3 100644 --- a/vibi-dpu/src/llm/mermaid_elements.rs +++ b/vibi-dpu/src/llm/mermaid_elements.rs @@ -21,7 +21,7 @@ pub async fn generate_mermaid_flowchart(small_files: &Vec, review: &Re async fn generate_flowchart_elements(small_files: &Vec, review: &Review) -> Option { let (file_lines_del_map, file_lines_add_map) = get_changed_files(small_files, review); let mut subgraph_map = HashMap::::new(); - let mut edges_vec = MermaidEdges::new(Vec::::new()); + let mut edges = MermaidEdges::new(Vec::::new()); let files: Vec = small_files.iter().map(|item| item.filepath.clone()).collect(); for file in files { generate_mermaid_content( @@ -30,11 +30,16 @@ async fn generate_flowchart_elements(small_files: &Vec, review: &Revie &file, &file_lines_del_map, &file_lines_add_map, - &mut edges_vec, + &mut edges, ).await; } // Render content string - return None; + let subgraphs_str = subgraph_map.values().map( + |subgraph| subgraph.render_subgraph() + ).collect::>().join("\n"); + let edges_str = edges.render_edges(); + let content_str = format!("{}\n{}", &subgraphs_str, &edges_str); + return Some(content_str); } async fn generate_mermaid_content( From d308fbc970263d01ab30718708c4598834286673 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Mon, 8 Jul 2024 20:58:49 +0530 Subject: [PATCH 08/43] Fix prompt path in docker --- vibi-dpu/src/llm/function_info.rs | 19 +++++++++++-------- vibi-dpu/src/llm/utils.rs | 4 ---- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/vibi-dpu/src/llm/function_info.rs b/vibi-dpu/src/llm/function_info.rs index 38a3c64c..2aef3067 100644 --- a/vibi-dpu/src/llm/function_info.rs +++ b/vibi-dpu/src/llm/function_info.rs @@ -22,7 +22,7 @@ impl FunctionLineMap { } pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> Option> { - let system_prompt_opt = read_file("/app/prompt_function_lines"); + let system_prompt_opt = read_file("/app/prompts/prompt_function_lines"); if system_prompt_opt.is_none() { log::error!("[mermaid_comment] Unable to read system prompt"); return None; @@ -107,7 +107,7 @@ pub async fn extract_function_calls(hunk_lines: &Vec<(usize, usize)>, numbered_c let user_prompt = get_specific_lines( hunk_lines.to_owned(), numbered_content); // prepare prompt and call llm api - let system_prompt_opt = read_file("/app/prompt_function_calls"); + let system_prompt_opt = read_file("/app/prompts/prompt_function_calls"); if system_prompt_opt.is_none() { log::error!("[extract_function_calls] Unable to read system prompt /app/prompt_function_calls"); return None; @@ -144,10 +144,10 @@ pub async fn extract_function_calls(hunk_lines: &Vec<(usize, usize)>, numbered_c pub struct CalledFunctionPath { pub path: String, pub function_name: String, - line: u32 + import_line: u32 } pub async fn extract_function_import_path(called_funcs: &Vec, numbered_content: &str, file_name: &str) -> Option> { - let system_prompt_opt = read_file("/app/prompt_function_call_path"); + let system_prompt_opt = read_file("/app/prompts/prompt_function_call_path"); if system_prompt_opt.is_none() { log::error!("[extract_function_calls] Unable to read system prompt /app/prompt_function_calls"); return None; @@ -158,7 +158,7 @@ pub async fn extract_function_import_path(called_funcs: &Vec, nu let numbered_lines: Vec<&str> = numbered_content.lines().collect(); for called_func in called_funcs { // extract hunk lines from numbered content or get it as input - let first_occurence_line_opt = find_first_occurence(&numbered_lines, &called_func.name); + let first_occurence_line_opt = find_first_occurence(&numbered_lines, &called_func.name, called_func.line); if first_occurence_line_opt.is_none() { log::debug!("[extract_function_import_path] No first occurence found for: {}", &called_func.name); continue; @@ -197,9 +197,12 @@ pub async fn extract_function_import_path(called_funcs: &Vec, nu // optional - paginate } -fn find_first_occurence(lines: &Vec<&str>, func_name: &str) -> Option { - for line in lines { - if line.contains(func_name) { +fn find_first_occurence(lines: &Vec<&str>, func_name: &str, hunk_line: usize) -> Option { + for (idx, line) in lines.iter().enumerate() { + if idx+1 > hunk_line { // assumption - import info would be above function use + return None; + } + if idx+1 != hunk_line && line.contains(func_name) { return Some(line.to_owned().to_owned()); } } diff --git a/vibi-dpu/src/llm/utils.rs b/vibi-dpu/src/llm/utils.rs index e8955118..2d16371d 100644 --- a/vibi-dpu/src/llm/utils.rs +++ b/vibi-dpu/src/llm/utils.rs @@ -85,10 +85,6 @@ pub fn read_file(file: &str) -> Option { Some(content) } -pub fn parse_llm_response(llm_response: &str) -> Option { - return None; -} - pub fn get_specific_lines(line_numbers: Vec<(usize, usize)>, numbered_content: &str) -> String { // Split the input content into lines and collect into a vector let lines: Vec<&str> = numbered_content.lines().collect(); From 662fe9c81a09b6a3ff4272042f79279b17bfa818 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Mon, 8 Jul 2024 21:13:29 +0530 Subject: [PATCH 09/43] remove import line assumption --- vibi-dpu/src/llm/function_info.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/vibi-dpu/src/llm/function_info.rs b/vibi-dpu/src/llm/function_info.rs index 2aef3067..cbac1f22 100644 --- a/vibi-dpu/src/llm/function_info.rs +++ b/vibi-dpu/src/llm/function_info.rs @@ -199,9 +199,6 @@ pub async fn extract_function_import_path(called_funcs: &Vec, nu fn find_first_occurence(lines: &Vec<&str>, func_name: &str, hunk_line: usize) -> Option { for (idx, line) in lines.iter().enumerate() { - if idx+1 > hunk_line { // assumption - import info would be above function use - return None; - } if idx+1 != hunk_line && line.contains(func_name) { return Some(line.to_owned().to_owned()); } From 46475dd8fe563bde3a93dc988962986a14ecc47d Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Tue, 9 Jul 2024 01:41:38 +0530 Subject: [PATCH 10/43] fix response parsing for flinemap --- vibi-dpu/src/llm/function_info.rs | 33 ++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/vibi-dpu/src/llm/function_info.rs b/vibi-dpu/src/llm/function_info.rs index cbac1f22..bf88df0b 100644 --- a/vibi-dpu/src/llm/function_info.rs +++ b/vibi-dpu/src/llm/function_info.rs @@ -2,6 +2,11 @@ use serde::{Deserialize, Serialize}; use super::utils::{call_llm_api, get_specific_lines, read_file}; +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFunctionLineMapResponse { + functions: Vec +} + #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FunctionLineMap { pub name: String, @@ -52,18 +57,23 @@ pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> return None; } Some(llm_response) => { + let mut unparsed_res = llm_response; // parse response to FunctionLineMap - let flinemap_res = serde_json::from_str(&llm_response); + if unparsed_res.contains("```json") { + unparsed_res = extract_json_from_llm_response(&unparsed_res); + } + let flinemap_res = serde_json::from_str(&unparsed_res); + log::debug!("[extract_function_lines] flinemap_res {:?} ", &flinemap_res); if flinemap_res.is_err() { let e = flinemap_res.expect_err("Empty error in flinemap_res"); log::error!( "[extract_function_lines] Unable to deserialize llm response: {:?}, error - {:?}", - &llm_response, e); + &unparsed_res, e); continue; } - let flinemap = flinemap_res.expect("Uncaught error in flinemap_res"); + let flinemapresp: LlmFunctionLineMapResponse = flinemap_res.expect("Uncaught error in flinemap_res"); // add to vec - flines.push(flinemap); + flines.extend(flinemapresp.functions); } } } @@ -75,7 +85,20 @@ pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> return Some(parsed_flines); } +fn extract_json_from_llm_response(llm_response: &str) -> String { + let start_delim = "```json"; + let end_delim = "```"; + // Find the starting index of the JSON part + let start_index = llm_response.find(start_delim).expect("find operation failed for ```json"); + // Find the ending index of the JSON part + let end_index = llm_response[start_index + start_delim.len()..].find(end_delim).expect("find for ``` failed"); + + // Extract the JSON part + llm_response[start_index + start_delim.len()..start_index + start_delim.len() + end_index].trim().to_string() +} + fn process_flinemap_response(flines: &Vec) -> Vec { + log::debug!("[process_flinemap_response] flines = {:?}", &flines); let mut resolved_flines = vec![]; let mut unfinished_function = FunctionLineMap::new("", 0, 0, ""); for flinemap in flines { @@ -92,7 +115,7 @@ fn process_flinemap_response(flines: &Vec) -> Vec Date: Tue, 9 Jul 2024 06:30:42 +0530 Subject: [PATCH 11/43] misc fixes to comment generation and git parsing --- vibi-dpu/src/llm/elements.rs | 2 +- vibi-dpu/src/llm/function_info.rs | 54 +++++++++++++++++---------- vibi-dpu/src/llm/gitops.rs | 5 ++- vibi-dpu/src/llm/mermaid_elements.rs | 55 +++++++++++++++++----------- vibi-dpu/src/llm/utils.rs | 10 +++-- 5 files changed, 79 insertions(+), 47 deletions(-) diff --git a/vibi-dpu/src/llm/elements.rs b/vibi-dpu/src/llm/elements.rs index 06ae3440..2b5a321a 100644 --- a/vibi-dpu/src/llm/elements.rs +++ b/vibi-dpu/src/llm/elements.rs @@ -37,7 +37,7 @@ impl MermaidSubgraph { all_nodes.push(node.render_node()); } let subgraph_str = format!( - "\tsubgraph {} [{}]\n{}\t\tend\n", + "\tsubgraph {} [{}]\n{}\nend\n", self.mermaid_id, self.name, all_nodes.join("\n") diff --git a/vibi-dpu/src/llm/function_info.rs b/vibi-dpu/src/llm/function_info.rs index bf88df0b..c9fda682 100644 --- a/vibi-dpu/src/llm/function_info.rs +++ b/vibi-dpu/src/llm/function_info.rs @@ -4,7 +4,7 @@ use super::utils::{call_llm_api, get_specific_lines, read_file}; #[derive(Debug, Serialize, Default, Deserialize, Clone)] struct LlmFunctionLineMapResponse { - functions: Vec + functions: Option> } #[derive(Debug, Serialize, Default, Deserialize, Clone)] @@ -73,7 +73,9 @@ pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> } let flinemapresp: LlmFunctionLineMapResponse = flinemap_res.expect("Uncaught error in flinemap_res"); // add to vec - flines.extend(flinemapresp.functions); + if flinemapresp.functions.is_some() { + flines.extend(flinemapresp.functions.expect("Empty functions")); + } } } } @@ -99,17 +101,12 @@ fn extract_json_from_llm_response(llm_response: &str) -> String { fn process_flinemap_response(flines: &Vec) -> Vec { log::debug!("[process_flinemap_response] flines = {:?}", &flines); - let mut resolved_flines = vec![]; - let mut unfinished_function = FunctionLineMap::new("", 0, 0, ""); + let mut resolved_flines: Vec = vec![]; for flinemap in flines { - if flinemap.line_end == -1 { - unfinished_function = flinemap.clone(); - continue; - } if flinemap.name == "unknown" { - if unfinished_function.line_end == -1 { - unfinished_function.line_end = flinemap.line_start; - resolved_flines.push(unfinished_function.clone()); + if !resolved_flines.is_empty() { + let fline_len = resolved_flines.len(); + resolved_flines[fline_len - 1].line_end = flinemap.line_end; continue; } } @@ -118,6 +115,10 @@ fn process_flinemap_response(flines: &Vec) -> Vec> +} #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct CalledFunction { @@ -148,27 +149,37 @@ pub async fn extract_function_calls(hunk_lines: &Vec<(usize, usize)>, numbered_c return None; } Some(llm_response) => { - // parse response and return CalledFunction Vec // optional - paginate - let called_functions_res = serde_json::from_str(&llm_response); + let mut unparsed_res = llm_response; + // parse response to FunctionLineMap + if unparsed_res.contains("```json") { + unparsed_res = extract_json_from_llm_response(&unparsed_res); + } + let called_functions_res = serde_json::from_str(&unparsed_res); if called_functions_res.is_err() { let e = called_functions_res.expect_err("Empty error in called_functions_res"); log::error!( "[extract_function_calls] Unable to deserialize called_functions: {:?}", e); return None; } - let called_functions: Vec = called_functions_res.expect("Uncaught error in called_functions_res"); - return Some(called_functions); + let called_func_response: LlmCalledFunctionResponse = called_functions_res.expect("Uncaught error in called_functions_res"); + return called_func_response.functions; } } } +#[derive(Debug, Default, Deserialize, Clone)] +struct LlmCalledFunctionPathResponse { + functions: Option> +} + #[derive(Debug, Default, Deserialize, Clone)] pub struct CalledFunctionPath { - pub path: String, + pub import_path: String, pub function_name: String, import_line: u32 } + pub async fn extract_function_import_path(called_funcs: &Vec, numbered_content: &str, file_name: &str) -> Option> { let system_prompt_opt = read_file("/app/prompts/prompt_function_call_path"); if system_prompt_opt.is_none() { @@ -206,15 +217,20 @@ pub async fn extract_function_import_path(called_funcs: &Vec, nu return None; } Some(llm_response) => { - let called_functions_res = serde_json::from_str(&llm_response); + let mut unparsed_res = llm_response; + // parse response to FunctionLineMap + if unparsed_res.contains("```json") { + unparsed_res = extract_json_from_llm_response(&unparsed_res); + } + let called_functions_res = serde_json::from_str(&unparsed_res); if called_functions_res.is_err() { let e = called_functions_res.expect_err("Empty error in called_functions_res"); log::error!( "[extract_function_calls] Unable to deserialize called_functions: {:?}", e); return None; } - let called_func_paths: Vec = called_functions_res.expect("Uncaught error in called_functions_res"); - return Some(called_func_paths); + let called_func_paths_res: LlmCalledFunctionPathResponse = called_functions_res.expect("Uncaught error in called_functions_res"); + return called_func_paths_res.functions; } } // optional - paginate diff --git a/vibi-dpu/src/llm/gitops.rs b/vibi-dpu/src/llm/gitops.rs index 71f90cbe..9ee2dc30 100644 --- a/vibi-dpu/src/llm/gitops.rs +++ b/vibi-dpu/src/llm/gitops.rs @@ -71,7 +71,7 @@ pub fn get_changed_files(small_files: &Vec, review: &Review) -> (HashM del_hunks_map.insert(filepath.to_string(), del_hunks); } } - (add_hunks_map, del_hunks_map) + (del_hunks_map, add_hunks_map) } fn parse_hunk_range(hunk: &str) -> Option<(usize, usize)> { @@ -84,6 +84,9 @@ fn parse_hunk_range(hunk: &str) -> Option<(usize, usize)> { } } else if parts.len() == 2 { if let (Ok(start), Ok(len)) = (parts[0].parse::(), parts[1].parse::()) { + if len == 0 { + return None; + } return Some((start, len)); } } diff --git a/vibi-dpu/src/llm/mermaid_elements.rs b/vibi-dpu/src/llm/mermaid_elements.rs index 5c23fdf3..05b4ebe6 100644 --- a/vibi-dpu/src/llm/mermaid_elements.rs +++ b/vibi-dpu/src/llm/mermaid_elements.rs @@ -12,7 +12,7 @@ pub async fn generate_mermaid_flowchart(small_files: &Vec, review: &Re } let flowchart_content = flowchart_content_res.expect("Empty flowchart_content_res"); let flowchart_str = format!( - "%%{{init: {{\"flowchart\": {{\"htmlLabels\": false}}}} }}%%\nflowchart LR{}\n", + "%%{{init: {{\"flowchart\": {{\"htmlLabels\": false}}}} }}%%\nflowchart LR\n{}\n", &flowchart_content ); return Some(flowchart_str); @@ -63,19 +63,26 @@ async fn generate_mermaid_content( let numbered_content = file_contents .lines() .enumerate() - .map(|(index, line)| format!("{} {}", index + 1, line)) + .map(|(index, line)| format!("{} {}", index, line)) .collect::>() .join("\n"); - let flinemap_opt = extract_function_lines( - &numbered_content, - file - ).await; - if flinemap_opt.is_none() { - log::debug!( - "[generate_mermaid_content] Unable to generate function line map for file: {}", file); - return; - } - let flinemap = flinemap_opt.expect("Empty flinemap_opt"); + // let flinemap_opt = extract_function_lines( + // &numbered_content, + // file + // ).await; + // if flinemap_opt.is_none() { + // log::debug!( + // "[generate_mermaid_content] Unable to generate function line map for file: {}", file); + // return; + // } + // let flinemap = flinemap_opt.expect("Empty flinemap_opt"); + let flinemap = vec![ + FunctionLineMap::new("unknown", -1, 30, "devprofiler/src/main.rs"), + FunctionLineMap::new("UserAlias", 34, 36, "devprofiler/src/main.rs"), + FunctionLineMap::new("process_repos", 38, 67, "devprofiler/src/main.rs"), + FunctionLineMap::new("process_aliases", 78, 116, "devprofiler/src/main.rs"), + FunctionLineMap::new("main", 119, 195, "devprofiler/src/main.rs"), + ]; // deleted lines let called_info_del_opt = generate_called_function_info( file_lines_del_map, &numbered_content, file).await; @@ -97,17 +104,17 @@ async fn generate_mermaid_content( edges, "red"); // added lines - let called_info_del_opt = generate_called_function_info( + let called_info_add_opt = generate_called_function_info( file_lines_add_map, &numbered_content, file).await; - if called_info_del_opt.is_none() { + if called_info_add_opt.is_none() { log::error!("[generate_mermaid_content] Unable to generate called functions info"); return; } - let (called_funcs_add, called_func_paths_add) = called_info_del_opt.expect("Empty called_info_opt"); + let (called_funcs_add, called_func_paths_add) = called_info_add_opt.expect("Empty called_info_opt"); generate_callee_nodes(&called_func_paths_add, subgraph_map); generate_caller_elements( subgraph_map, - &file_lines_del_map[file], + &file_lines_add_map[file], &flinemap, &called_funcs_add, &called_func_paths_add, @@ -143,7 +150,7 @@ fn generate_caller_elements(subgraph_map: &HashMap, edges.add_edge(MermaidEdge::new( cf.line, caller_node.to_owned(), - subgraph_map[&cfp.path].nodes()[&cf.name].to_owned(), + subgraph_map[&cfp.import_path].nodes()[&cf.name].to_owned(), color.to_string() )); } @@ -153,7 +160,7 @@ fn generate_caller_elements(subgraph_map: &HashMap, fn get_func_from_line(line: usize, flinemaps: &[FunctionLineMap]) -> Option { for flinemap in flinemaps { - if flinemap.line_start >= line as i32 && flinemap.line_end <= line as i32 { + if flinemap.line_start <= line as i32 && flinemap.line_end >= line as i32 { return Some(flinemap.name.to_string()); } } @@ -165,7 +172,7 @@ fn generate_callee_nodes( subgraph_map: &mut HashMap) { for cfp in called_funcs_path { - if let Some(subgraph) = subgraph_map.get_mut(&cfp.path) { + if let Some(subgraph) = subgraph_map.get_mut(&cfp.import_path) { subgraph.add_node( MermaidNode::new(cfp.function_name.to_string()) ); @@ -176,10 +183,10 @@ fn generate_callee_nodes( let mut node_map = HashMap::::new(); node_map.insert(cfp.function_name.to_string(), MermaidNode::new(cfp.function_name.to_string())); let subgraph = MermaidSubgraph::new( - cfp.path.to_string(), + cfp.import_path.to_string(), node_map ); - subgraph_map.insert(cfp.path.to_string(), subgraph); + subgraph_map.insert(cfp.import_path.to_string(), subgraph); } } return; @@ -190,6 +197,10 @@ async fn generate_called_function_info(file_lines_map: &HashMap Option<(Vec, Vec)> { + if !file_lines_map.contains_key(filename) { + log::error!("[generate_called_function_info] Unable to find file: {} in map", &filename); + return None; + } let del_lines = &file_lines_map[filename]; let called_funcs_opt = extract_function_calls( del_lines, @@ -207,7 +218,7 @@ async fn generate_called_function_info(file_lines_map: &HashMap Option { start = end + 1; } - log::debug!("[call_llm_api] chunks = {:?}", &chunks); for chunk in chunks { let parsed_chunk_res = serde_json::from_str(&chunk); if parsed_chunk_res.is_err() { @@ -89,9 +88,13 @@ pub fn get_specific_lines(line_numbers: Vec<(usize, usize)>, numbered_content: & // Split the input content into lines and collect into a vector let lines: Vec<&str> = numbered_content.lines().collect(); let mut result = String::new(); - // Iterate over each line number we are interested in - for (start, end) in line_numbers { + for (mut start, mut end) in line_numbers { + if start > end { + let xchng = start; + start = end; + end = xchng; + } for line_number in start..=end { // Check if the line_number is within the bounds of the vector if line_number < lines.len() { @@ -100,7 +103,6 @@ pub fn get_specific_lines(line_numbers: Vec<(usize, usize)>, numbered_content: & } } } - return result; } From 4f4e26d90ace354a5323c7e3a7b0df57c4cb1c2d Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Wed, 10 Jul 2024 02:27:40 +0530 Subject: [PATCH 12/43] implement git checkout --- vibi-dpu/src/core/review.rs | 3 +- vibi-dpu/src/llm/elements.rs | 8 +- vibi-dpu/src/llm/function_info.rs | 38 +++++-- vibi-dpu/src/llm/mermaid_elements.rs | 159 ++++++++++++++------------- vibi-dpu/src/utils/gitops.rs | 23 ++++ 5 files changed, 145 insertions(+), 86 deletions(-) diff --git a/vibi-dpu/src/core/review.rs b/vibi-dpu/src/core/review.rs index fcff276f..0bf67561 100644 --- a/vibi-dpu/src/core/review.rs +++ b/vibi-dpu/src/core/review.rs @@ -1,4 +1,4 @@ -use std::env; +use std::{env, thread, time::Duration}; use serde_json::Value; @@ -109,6 +109,7 @@ pub async fn commit_check(review: &Review, access_token: &str) { if !commit_exists(&review.base_head_commit(), &review.clone_dir()) || !commit_exists(&review.pr_head_commit(), &review.clone_dir()) { log::info!("Executing git pull on repo {}...", &review.repo_name()); + thread::sleep(Duration::from_secs(1)); git_pull(review, access_token).await; } } diff --git a/vibi-dpu/src/llm/elements.rs b/vibi-dpu/src/llm/elements.rs index 2b5a321a..056f66c0 100644 --- a/vibi-dpu/src/llm/elements.rs +++ b/vibi-dpu/src/llm/elements.rs @@ -28,6 +28,12 @@ impl MermaidSubgraph { } pub fn add_node(&mut self, node: MermaidNode) { + if self.nodes.contains_key(node.function_name()) { + log::error!( + "[add_node] Node already exists: old - {:#?}, new - {:#?}", + &self.nodes[node.function_name()], &node); + return; + } self.nodes.insert(node.function_name.to_string(), node); } @@ -150,7 +156,7 @@ impl MermaidEdge { } } - +#[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct MermaidEdges { all_edges: Vec, } diff --git a/vibi-dpu/src/llm/function_info.rs b/vibi-dpu/src/llm/function_info.rs index c9fda682..8fb3d251 100644 --- a/vibi-dpu/src/llm/function_info.rs +++ b/vibi-dpu/src/llm/function_info.rs @@ -62,16 +62,15 @@ pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> if unparsed_res.contains("```json") { unparsed_res = extract_json_from_llm_response(&unparsed_res); } - let flinemap_res = serde_json::from_str(&unparsed_res); - log::debug!("[extract_function_lines] flinemap_res {:?} ", &flinemap_res); - if flinemap_res.is_err() { - let e = flinemap_res.expect_err("Empty error in flinemap_res"); + let flinemap_opt = clean_and_deserialize(&unparsed_res); + log::debug!("[extract_function_lines] flinemap_res {:?} ", &flinemap_opt); + if flinemap_opt.is_none() { log::error!( - "[extract_function_lines] Unable to deserialize llm response: {:?}, error - {:?}", - &unparsed_res, e); + "[extract_function_lines] Unable to clean and deserialize llm response: {:?}", + &unparsed_res); continue; } - let flinemapresp: LlmFunctionLineMapResponse = flinemap_res.expect("Uncaught error in flinemap_res"); + let flinemapresp: LlmFunctionLineMapResponse = flinemap_opt.expect("Uncaught error in flinemap_res"); // add to vec if flinemapresp.functions.is_some() { flines.extend(flinemapresp.functions.expect("Empty functions")); @@ -83,10 +82,24 @@ pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> log::error!("[extract_function_lines] No functions extracted"); return None; } - let parsed_flines = process_flinemap_response(&flines); + let parsed_flines = process_flinemap_response(&flines, lines.len()); return Some(parsed_flines); } +fn clean_and_deserialize(json_str: &str) -> Option { + let mut cleaned_str = json_str.to_string(); + while !cleaned_str.is_empty() { + match serde_json::from_str(&cleaned_str) { + Ok(parsed) => return Some(parsed), + Err(e) if e.to_string().contains("trailing characters") => { + cleaned_str.pop(); // Remove the last character and try again + } + Err(e) => return None, + } + } + None +} + fn extract_json_from_llm_response(llm_response: &str) -> String { let start_delim = "```json"; let end_delim = "```"; @@ -99,7 +112,7 @@ fn extract_json_from_llm_response(llm_response: &str) -> String { llm_response[start_index + start_delim.len()..start_index + start_delim.len() + end_index].trim().to_string() } -fn process_flinemap_response(flines: &Vec) -> Vec { +fn process_flinemap_response(flines: &Vec, total_lines: usize) -> Vec { log::debug!("[process_flinemap_response] flines = {:?}", &flines); let mut resolved_flines: Vec = vec![]; for flinemap in flines { @@ -112,9 +125,16 @@ fn process_flinemap_response(flines: &Vec) -> Vec> diff --git a/vibi-dpu/src/llm/mermaid_elements.rs b/vibi-dpu/src/llm/mermaid_elements.rs index 05b4ebe6..8b1f792c 100644 --- a/vibi-dpu/src/llm/mermaid_elements.rs +++ b/vibi-dpu/src/llm/mermaid_elements.rs @@ -1,6 +1,6 @@ -use std::collections::HashMap; +use std::{borrow::BorrowMut, collections::HashMap}; -use crate::utils::{gitops::StatItem, review::Review}; +use crate::utils::{gitops::{git_checkout_commit, StatItem}, review::Review}; use super::{elements::{MermaidEdge, MermaidEdges, MermaidNode, MermaidSubgraph}, function_info::{extract_function_calls, extract_function_import_path, extract_function_lines, CalledFunction, CalledFunctionPath, FunctionLineMap}, gitops::get_changed_files, utils::read_file}; @@ -23,16 +23,32 @@ async fn generate_flowchart_elements(small_files: &Vec, review: &Revie let mut subgraph_map = HashMap::::new(); let mut edges = MermaidEdges::new(Vec::::new()); let files: Vec = small_files.iter().map(|item| item.filepath.clone()).collect(); - for file in files { - generate_mermaid_content( - &mut subgraph_map, - review, - &file, - &file_lines_del_map, - &file_lines_add_map, - &mut edges, - ).await; + for file in files.iter() { + if file_lines_add_map.contains_key(file) { + generate_mermaid_content( + &mut subgraph_map, + review, + file, + &file_lines_add_map, + &mut edges, + "green" + ).await; + } + } + git_checkout_commit(review, review.base_head_commit()); + for file in files.iter() { + if file_lines_del_map.contains_key(file) { + generate_mermaid_content( + &mut subgraph_map, + review, + file, + &file_lines_del_map, + &mut edges, + "red" + ).await; + } } + log::debug!("[generate_flowchart_elements] subgraph_map = {:#?}", &subgraph_map); // Render content string let subgraphs_str = subgraph_map.values().map( |subgraph| subgraph.render_subgraph() @@ -44,9 +60,9 @@ async fn generate_flowchart_elements(small_files: &Vec, review: &Revie async fn generate_mermaid_content( subgraph_map: &mut HashMap, review: &Review, file: &str, - file_lines_del_map: &HashMap>, - file_lines_add_map: &HashMap>, - edges: &mut MermaidEdges + file_lines_map: &HashMap>, + edges: &mut MermaidEdges, + color: &str ) { if !file.ends_with(".rs") { log::debug!("[mermaid_comment] File extension not valid: {}", &file); @@ -66,73 +82,45 @@ async fn generate_mermaid_content( .map(|(index, line)| format!("{} {}", index, line)) .collect::>() .join("\n"); - // let flinemap_opt = extract_function_lines( - // &numbered_content, - // file - // ).await; - // if flinemap_opt.is_none() { - // log::debug!( - // "[generate_mermaid_content] Unable to generate function line map for file: {}", file); - // return; - // } - // let flinemap = flinemap_opt.expect("Empty flinemap_opt"); - let flinemap = vec![ - FunctionLineMap::new("unknown", -1, 30, "devprofiler/src/main.rs"), - FunctionLineMap::new("UserAlias", 34, 36, "devprofiler/src/main.rs"), - FunctionLineMap::new("process_repos", 38, 67, "devprofiler/src/main.rs"), - FunctionLineMap::new("process_aliases", 78, 116, "devprofiler/src/main.rs"), - FunctionLineMap::new("main", 119, 195, "devprofiler/src/main.rs"), - ]; + let flinemap_opt = extract_function_lines( + &numbered_content, + file + ).await; + if flinemap_opt.is_none() { + log::debug!( + "[generate_mermaid_content] Unable to generate function line map for file: {}", file); + return; + } + let flinemap = flinemap_opt.expect("Empty flinemap_opt"); // deleted lines let called_info_del_opt = generate_called_function_info( - file_lines_del_map, &numbered_content, file).await; + file_lines_map, &numbered_content, file).await; if called_info_del_opt.is_none() { log::error!("[generate_mermaid_content] Unable to generate called functions info"); return; } let (called_funcs_del, called_func_paths_del) = called_info_del_opt.expect("Empty called_info_opt"); generate_callee_nodes(&called_func_paths_del, subgraph_map); - let file_subgraph = MermaidSubgraph::new( - file.to_string(), HashMap::::new()); generate_caller_elements( subgraph_map, - &file_lines_del_map[file], + &file_lines_map[file], &flinemap, &called_funcs_del, &called_func_paths_del, - &file_subgraph, edges, - "red"); - // added lines - let called_info_add_opt = generate_called_function_info( - file_lines_add_map, &numbered_content, file).await; - if called_info_add_opt.is_none() { - log::error!("[generate_mermaid_content] Unable to generate called functions info"); - return; - } - let (called_funcs_add, called_func_paths_add) = called_info_add_opt.expect("Empty called_info_opt"); - generate_callee_nodes(&called_func_paths_add, subgraph_map); - generate_caller_elements( - subgraph_map, - &file_lines_add_map[file], - &flinemap, - &called_funcs_add, - &called_func_paths_add, - &file_subgraph, - edges, - "green"); - subgraph_map.insert(file.to_string(), file_subgraph); + &file, + color); return; } -fn generate_caller_elements(subgraph_map: &HashMap, +fn generate_caller_elements(subgraph_map: &mut HashMap, hunk_lines: &Vec<(usize, usize)>, flinemap: &Vec, called_funcs: &Vec, called_funcs_path: &Vec, - file_subgraph: &MermaidSubgraph, - edges: &mut MermaidEdges, - color: &str) + edges: &mut MermaidEdges, + filename: &str, + color: &str) { for cf in called_funcs { let func_name_opt = get_func_from_line(cf.line, flinemap); @@ -141,26 +129,51 @@ fn generate_caller_elements(subgraph_map: &HashMap, continue; } let func_name = func_name_opt.expect("Empty func_name_opt"); - let caller_node = match file_subgraph.nodes().get(&func_name) { - Some(node) => node.to_owned(), - None => MermaidNode::new(func_name.clone()) - }; + let caller_node; + + // Borrow subgraph_map mutably to either retrieve or insert the subgraph + let subgraph = subgraph_map.entry(filename.to_string()).or_insert_with(|| { + MermaidSubgraph::new(filename.to_string(), HashMap::new()) + }); + + // Borrow subgraph mutably to either retrieve or insert the node + if let Some(node) = subgraph.nodes().get(&func_name) { + caller_node = node.to_owned(); + } else { + caller_node = MermaidNode::new(func_name.clone()); + subgraph.add_node(caller_node.clone()); + } + + log::debug!("[generate_caller_elements] subgraph_map = {:#?}", subgraph_map); + for cfp in called_funcs_path { if cf.name == cfp.function_name { - edges.add_edge(MermaidEdge::new( - cf.line, - caller_node.to_owned(), - subgraph_map[&cfp.import_path].nodes()[&cf.name].to_owned(), - color.to_string() - )); + // Ensure we do not have an immutable borrow of subgraph_map while we borrow it immutably here + if let Some(import_subgraph) = subgraph_map.get(&cfp.import_path) { + if let Some(called_node) = import_subgraph.nodes().get(&cf.name) { + edges.add_edge(MermaidEdge::new( + cf.line, + caller_node.clone(), + called_node.to_owned(), + color.to_string() + )); + } + } } - } + } + log::debug!("[generate_caller_elements] edges = {:#?}", &edges); } } + fn get_func_from_line(line: usize, flinemaps: &[FunctionLineMap]) -> Option { for flinemap in flinemaps { + log::debug!("[get_func_from_line] flinemap = {:#?}, line: {}", &flinemap, line); + log::debug!( + "[get_func_from_line] condition = {:?}", + (flinemap.line_start <= line as i32 && flinemap.line_end >= line as i32)); if flinemap.line_start <= line as i32 && flinemap.line_end >= line as i32 { + log::debug!("[get_func_from_line] inside if"); return Some(flinemap.name.to_string()); } } @@ -197,10 +210,6 @@ async fn generate_called_function_info(file_lines_map: &HashMap Option<(Vec, Vec)> { - if !file_lines_map.contains_key(filename) { - log::error!("[generate_called_function_info] Unable to find file: {} in map", &filename); - return None; - } let del_lines = &file_lines_map[filename]; let called_funcs_opt = extract_function_calls( del_lines, diff --git a/vibi-dpu/src/utils/gitops.rs b/vibi-dpu/src/utils/gitops.rs index a2411dee..63083686 100644 --- a/vibi-dpu/src/utils/gitops.rs +++ b/vibi-dpu/src/utils/gitops.rs @@ -89,6 +89,29 @@ pub async fn git_pull(review: &Review, access_token: &str) { }; } +pub fn git_checkout_commit(review: &Review, commit_id: &str) { + let directory = review.clone_dir(); + let output_res = Command::new("git") + .arg("checkout") + .arg(commit_id) + .current_dir(directory) + .output(); + if output_res.is_err() { + let e = output_res.expect_err("No error in output_res"); + log::error!("[git_pull] failed to execute git pull: {:?}", e); + return; + } + let output = output_res.expect("Uncaught error in output_res"); + match str::from_utf8(&output.stderr) { + Ok(v) => log::debug!("[git_pull] git pull stderr = {:?}", v), + Err(e) => {/* error handling */ log::error!("[git_pull] git pull stderr error {}", e)}, + }; + match str::from_utf8(&output.stdout) { + Ok(v) => log::debug!("[git_pull] git pull stdout = {:?}", v), + Err(e) => {/* error handling */ log::error!("[git_pull] git pull stdout error {}", e)}, + }; +} + fn set_git_url(git_url: &str, directory: &str, access_token: &str, repo_provider: &str) { let clone_url_opt = create_clone_url(git_url, access_token, repo_provider); if clone_url_opt.is_none(){ From 683710d41394ca4ffde318968c581de8e6e2839e Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Thu, 11 Jul 2024 02:16:51 +0530 Subject: [PATCH 13/43] dummy whitespace commit --- vibi-dpu/src/utils/relevance.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vibi-dpu/src/utils/relevance.rs b/vibi-dpu/src/utils/relevance.rs index 52d97101..9fa6c6a8 100644 --- a/vibi-dpu/src/utils/relevance.rs +++ b/vibi-dpu/src/utils/relevance.rs @@ -48,4 +48,4 @@ impl Relevance { pub fn handles(&self) -> &Option> { &self.handles } -} +} \ No newline at end of file From 36529f337cf6a2c35cd2da267f00d2f71be9fc71 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sat, 20 Jul 2024 12:26:13 +0530 Subject: [PATCH 14/43] Add previous state in function line extraction --- vibi-dpu/src/llm/function_info.rs | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/vibi-dpu/src/llm/function_info.rs b/vibi-dpu/src/llm/function_info.rs index 8fb3d251..bd4fedbf 100644 --- a/vibi-dpu/src/llm/function_info.rs +++ b/vibi-dpu/src/llm/function_info.rs @@ -40,14 +40,21 @@ pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> // Determine the batch size let batch_size = 30; - + let mut prev_state: Option = None; // Iterate over the lines in chunks of batch_size for chunk in lines.chunks(batch_size) { // create prompt // call llm api + let mut prev_state_str = "{}".to_string(); + if prev_state.is_some() { + if let Ok(res_str) = serde_json::to_string(&prev_state) { + prev_state_str = res_str; + } + } let prompt = format!( - "{}\n\n### User Message\nInput -\n{}\n{}\n\nOutput -", + "{}\n\n### User Message\nInput -\nprev_state ={}\n{}\n{}\n\nOutput -", system_prompt, + prev_state_str, file_name, chunk.join("\n") ); @@ -73,7 +80,15 @@ pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> let flinemapresp: LlmFunctionLineMapResponse = flinemap_opt.expect("Uncaught error in flinemap_res"); // add to vec if flinemapresp.functions.is_some() { - flines.extend(flinemapresp.functions.expect("Empty functions")); + let functions_arr = flinemapresp.functions.expect("Empty functions"); + if !functions_arr.is_empty() { + if let Some(func_obj) = functions_arr.last() { + if func_obj.line_end == -1 { + prev_state = Some(func_obj.clone()); + } + } + flines.extend(functions_arr); + } } } } From c9fe29edd55f1ed31b6a401981e2d6b2bec3bdeb Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Fri, 26 Jul 2024 23:00:22 +0530 Subject: [PATCH 15/43] re-implement getting function line range --- vibi-dpu/src/llm/function_line_range.rs | 220 ++++++++++++++++++++++++ vibi-dpu/src/llm/mermaid_elements.rs | 9 +- vibi-dpu/src/llm/mod.rs | 3 +- 3 files changed, 230 insertions(+), 2 deletions(-) create mode 100644 vibi-dpu/src/llm/function_line_range.rs diff --git a/vibi-dpu/src/llm/function_line_range.rs b/vibi-dpu/src/llm/function_line_range.rs new file mode 100644 index 00000000..f4da2e65 --- /dev/null +++ b/vibi-dpu/src/llm/function_line_range.rs @@ -0,0 +1,220 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use crate::utils::review::Review; + +use super::utils::{call_llm_api, read_file}; + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct FuncDefInfo { + name: String, + line_start: usize, + line_end: usize, + parent: String, +} +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct FunctionFileMap { + file_name: String, + functions: Vec + // implement a function which takes in starting and ending line numbers of a continous range + // and returns the functions inside the range like Vec of ((start_line, end_line) function_name) +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct AllFileFunctions { + func_map: HashMap // file name will be key +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDefInput { + language: String, + chunk: String +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDefRequest { + input: LlmFuncDefInput +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDef { + name: String, + line_num: usize, + parent: String +} +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDefResponse { + functions: Vec +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncBoundaryInput { + language: String, + func_declared: String, + chunk: String + +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncBoundaryRequest { + input: LlmFuncBoundaryInput +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncBoundaryResponse { + function_boundary: i32 +} + +pub async fn generate_function_map(review: &Review) -> Option { + let dir = review.clone_dir(); + let mut all_file_functions = AllFileFunctions { func_map: HashMap::new() }; + let system_prompt_opt = read_file("/app/prompts/prompt_function_lines"); + if system_prompt_opt.is_none() { + log::error!("[mermaid_comment] Unable to read system prompt"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let system_prompt_end_opt = read_file("/app/prompts/prompt_function_lines_end"); + if system_prompt_end_opt.is_none() { + log::error!("[mermaid_comment] Unable to read system prompt"); + return None; + } + let system_prompt_lines_end = system_prompt_end_opt.expect("Empty system_prompt"); + let entries_res = std::fs::read_dir(dir); + if entries_res.is_err() { + let e = entries_res.expect_err("Empty error in entry_res"); + log::error!( + "[generate_function_map] Error reading dir: {} error = {:?}", dir, e); + return None; + } + let entries = entries_res.expect("Empty error in entry_res"); + for entry_res in entries { + if entry_res.is_err() { + let e = entry_res.expect_err("Empty error in entry_res"); + log::error!( + "[generate_function_map] Error reading, skipping directory entry, error = {:?}", e); + continue; + } + let entry = entry_res.expect("Empty entry_res"); + let path = entry.path(); + if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { + let content = std::fs::read_to_string(path.clone()).ok()?; + let mut function_map = FunctionFileMap { + file_name: path.to_str().unwrap().to_string(), + functions: Vec::new(), + }; + + // Divide content into chunks of 30 lines + let lines: Vec<&str> = content.lines().collect(); + // TODO - convert lines to numbered content + let chunks = lines.chunks(50); + + for chunk in chunks { + let chunk_str = chunk.join("\n"); + let function_defs_opt = get_function_defs_in_chunk(&chunk_str, &system_prompt_lines_end).await; + if function_defs_opt.is_none() { + log::error!("[get_function_defs_in_chunk] Unable to get functions from llm"); + continue; + } + let function_defs = function_defs_opt.expect("Empty function_defs"); + for func_def in function_defs.functions.iter() { + let func_boundary_opt = get_function_boundaries_in_chunk(&lines, func_def.line_num, &system_prompt_lines_end).await; + if func_boundary_opt.is_none() { + continue; + } + let func_boundary = func_boundary_opt.expect("Empty func_boundary_opt"); + function_map.functions.push(FuncDefInfo { + name: func_def.name.clone(), + line_start: func_def.line_num, + line_end: func_boundary.function_boundary as usize, + parent: func_def.parent.clone(), + }); + } + } + all_file_functions.func_map.insert(path.to_str().unwrap().to_string(), function_map); + } + } + return Some(all_file_functions); +} + +async fn get_function_defs_in_chunk(chunk: &str, system_prompt: &str) -> Option { + let llm_req = LlmFuncDefRequest { + input: LlmFuncDefInput { + language: "rust".to_string(), + chunk: chunk.to_string() + } + }; + let llm_req_res = serde_json::to_string(&llm_req); + if llm_req_res.is_err() { + log::error!("[get_function_defs_in_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); + return None; + } + let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); + let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", system_prompt, llm_req_prompt); + match call_llm_api(prompt).await { + None => { + log::error!("[mermaid_comment] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + let funcdefs_res = serde_json::from_str(&llm_response); + if funcdefs_res.is_err() { + log::error!( + "[get_function_defs_in_chunk] funcdefs error: {}", + funcdefs_res.expect_err("Empty error in funcdefs_res")); + return None; + } + let funcdefs: LlmFuncDefResponse = funcdefs_res.expect("Uncaught error in funcdefs_res"); + return Some(funcdefs); + } + } +} + +async fn get_function_boundaries_in_chunk(file_lines_numbered: &Vec<&str>, func_def_line_num: usize, system_prompt: &str) -> Option { + // divide lines into chunks and call with each chunk until line_end is found or files is empty + let chunk_size = 70; + let mut start = func_def_line_num; + + while start < file_lines_numbered.len() { + let end = std::cmp::min(start + chunk_size, file_lines_numbered.len()); + let chunk: Vec<&str> = file_lines_numbered[start..end].to_vec(); + let chunk_str = chunk.join("\n"); + + let input = LlmFuncBoundaryInput { + language: "rust".to_string(), // Assuming Rust as language, you can modify this as needed + func_declared: file_lines_numbered[func_def_line_num].to_string(), + chunk: chunk_str, + }; + let llm_req = LlmFuncBoundaryRequest { input }; + let llm_req_res = serde_json::to_string(&llm_req); + if llm_req_res.is_err() { + log::error!("[get_function_defs_in_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); + return None; + } + let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); + let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", system_prompt, llm_req_prompt); + match call_llm_api(prompt).await { + None => { + log::error!("[mermaid_comment] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + let func_resp_res = serde_json::from_str(&llm_response); + if func_resp_res.is_err() { + let e = func_resp_res.expect_err("Empty error func_resp_res"); + log::error!("[get_function_boundaries_in_chunk] Unable to deserialize response"); + return None; + } + let func_resp: LlmFuncBoundaryResponse = func_resp_res.expect("Uncaught error in func_resp_res"); + if func_resp.function_boundary == -1 { + start += chunk_size; + continue; + } + return Some(func_resp); + } + } + } + + return None; +} \ No newline at end of file diff --git a/vibi-dpu/src/llm/mermaid_elements.rs b/vibi-dpu/src/llm/mermaid_elements.rs index 8b1f792c..4b0c23fb 100644 --- a/vibi-dpu/src/llm/mermaid_elements.rs +++ b/vibi-dpu/src/llm/mermaid_elements.rs @@ -2,9 +2,16 @@ use std::{borrow::BorrowMut, collections::HashMap}; use crate::utils::{gitops::{git_checkout_commit, StatItem}, review::Review}; -use super::{elements::{MermaidEdge, MermaidEdges, MermaidNode, MermaidSubgraph}, function_info::{extract_function_calls, extract_function_import_path, extract_function_lines, CalledFunction, CalledFunctionPath, FunctionLineMap}, gitops::get_changed_files, utils::read_file}; +use super::{elements::{MermaidEdge, MermaidEdges, MermaidNode, MermaidSubgraph}, function_info::{extract_function_calls, extract_function_import_path, extract_function_lines, CalledFunction, CalledFunctionPath, FunctionLineMap}, function_line_range::generate_function_map, gitops::get_changed_files, utils::read_file}; pub async fn generate_mermaid_flowchart(small_files: &Vec, review: &Review) -> Option { + let function_map_opt = generate_function_map(review).await; + if function_map_opt.is_none() { + log::error!("[generate_mermaid_flowchart] Unable to generate function map"); + return None; + } + let function_map = function_map_opt.expect("Empty function_map_opt"); + log::debug!("[generate_mermaid_flowchart] func map = {:?}", &function_map); let flowchart_content_res = generate_flowchart_elements(small_files, review).await; if flowchart_content_res.is_none() { log::error!("[generate_mermaid_flowchart] Unable to generate flowchart content, review: {}", review.id()); diff --git a/vibi-dpu/src/llm/mod.rs b/vibi-dpu/src/llm/mod.rs index d816a721..c9f75f47 100644 --- a/vibi-dpu/src/llm/mod.rs +++ b/vibi-dpu/src/llm/mod.rs @@ -2,4 +2,5 @@ pub mod utils; pub mod gitops; pub mod function_info; pub mod mermaid_elements; -pub mod elements; \ No newline at end of file +pub mod elements; +pub mod function_line_range; \ No newline at end of file From e6f3ad96a3fe6b332d9605ab8fdba5688a190f0c Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sun, 28 Jul 2024 15:40:20 +0530 Subject: [PATCH 16/43] fix directory traversal and implement numbered content --- vibi-dpu/Cargo.toml | 2 +- vibi-dpu/src/llm/function_line_range.rs | 125 ++++++++++++------------ 2 files changed, 63 insertions(+), 64 deletions(-) diff --git a/vibi-dpu/Cargo.toml b/vibi-dpu/Cargo.toml index 76634418..a021f9bc 100644 --- a/vibi-dpu/Cargo.toml +++ b/vibi-dpu/Cargo.toml @@ -37,5 +37,5 @@ once_cell = "1.18.0" # MIT jsonwebtoken = "8.3.0" # MIT fern = "0.6.2" # MIT log = "0.4.20" # MIT/Apache2 - +walkdir = "2.5.0" # Unlicence/MIT # todo - check all lib licences diff --git a/vibi-dpu/src/llm/function_line_range.rs b/vibi-dpu/src/llm/function_line_range.rs index f4da2e65..c0dd4a35 100644 --- a/vibi-dpu/src/llm/function_line_range.rs +++ b/vibi-dpu/src/llm/function_line_range.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use serde::{Deserialize, Serialize}; +use walkdir::WalkDir; use crate::utils::review::Review; @@ -71,48 +72,38 @@ pub async fn generate_function_map(review: &Review) -> Option let mut all_file_functions = AllFileFunctions { func_map: HashMap::new() }; let system_prompt_opt = read_file("/app/prompts/prompt_function_lines"); if system_prompt_opt.is_none() { - log::error!("[mermaid_comment] Unable to read system prompt"); + log::error!("[mermaid_comment] Unable to read prompt_function_lines"); return None; } let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); - let system_prompt_end_opt = read_file("/app/prompts/prompt_function_lines_end"); + let system_prompt_end_opt = read_file("/app/prompts/prompt_function_boundary"); if system_prompt_end_opt.is_none() { - log::error!("[mermaid_comment] Unable to read system prompt"); + log::error!("[mermaid_comment] Unable to read prompt_function_boundary"); return None; } let system_prompt_lines_end = system_prompt_end_opt.expect("Empty system_prompt"); - let entries_res = std::fs::read_dir(dir); - if entries_res.is_err() { - let e = entries_res.expect_err("Empty error in entry_res"); - log::error!( - "[generate_function_map] Error reading dir: {} error = {:?}", dir, e); - return None; - } - let entries = entries_res.expect("Empty error in entry_res"); - for entry_res in entries { - if entry_res.is_err() { - let e = entry_res.expect_err("Empty error in entry_res"); - log::error!( - "[generate_function_map] Error reading, skipping directory entry, error = {:?}", e); - continue; - } - let entry = entry_res.expect("Empty entry_res"); + for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) { let path = entry.path(); + log::debug!("[generate_function_map] path = {:?}", path); + let ext = path.extension().and_then(|ext| ext.to_str()); + log::debug!("[generate_function_map] extension = {:?}", &ext); if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { - let content = std::fs::read_to_string(path.clone()).ok()?; + let file_contents = std::fs::read_to_string(path).ok()?; + let lines = file_contents + .lines() + .enumerate() + .map(|(index, line)| format!("{} {}", index+1, line)) + .collect::>(); let mut function_map = FunctionFileMap { file_name: path.to_str().unwrap().to_string(), functions: Vec::new(), }; - - // Divide content into chunks of 30 lines - let lines: Vec<&str> = content.lines().collect(); // TODO - convert lines to numbered content let chunks = lines.chunks(50); for chunk in chunks { let chunk_str = chunk.join("\n"); - let function_defs_opt = get_function_defs_in_chunk(&chunk_str, &system_prompt_lines_end).await; + let function_defs_opt = get_function_defs_in_chunk(&chunk_str, &system_prompt_lines).await; if function_defs_opt.is_none() { log::error!("[get_function_defs_in_chunk] Unable to get functions from llm"); continue; @@ -132,6 +123,7 @@ pub async fn generate_function_map(review: &Review) -> Option }); } } + log::debug!("[generate_function_map] func_map = {:#?}", &function_map); all_file_functions.func_map.insert(path.to_str().unwrap().to_string(), function_map); } } @@ -152,33 +144,35 @@ async fn get_function_defs_in_chunk(chunk: &str, system_prompt: &str) -> Option< } let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", system_prompt, llm_req_prompt); - match call_llm_api(prompt).await { - None => { - log::error!("[mermaid_comment] Failed to call LLM API"); - return None; - } - Some(llm_response) => { - let funcdefs_res = serde_json::from_str(&llm_response); - if funcdefs_res.is_err() { - log::error!( - "[get_function_defs_in_chunk] funcdefs error: {}", - funcdefs_res.expect_err("Empty error in funcdefs_res")); - return None; - } - let funcdefs: LlmFuncDefResponse = funcdefs_res.expect("Uncaught error in funcdefs_res"); - return Some(funcdefs); - } - } + // match call_llm_api(prompt).await { + // None => { + // log::error!("[mermaid_comment] Failed to call LLM API"); + // return None; + // } + // Some(llm_response) => { + // // let funcdefs_res = serde_json::from_str(&llm_response); + // // if funcdefs_res.is_err() { + // // log::error!( + // // "[get_function_defs_in_chunk] funcdefs error: {}", + // // funcdefs_res.expect_err("Empty error in funcdefs_res")); + // // return None; + // // } + // // let funcdefs: LlmFuncDefResponse = funcdefs_res.expect("Uncaught error in funcdefs_res"); + // return Some(funcdefs); + // } + // } + let funcdefs = LlmFuncDefResponse{ functions: vec![LlmFuncDef{ name: "main".to_string(), line_num: 18, parent: "".to_string() }] }; + return Some(funcdefs); } -async fn get_function_boundaries_in_chunk(file_lines_numbered: &Vec<&str>, func_def_line_num: usize, system_prompt: &str) -> Option { +async fn get_function_boundaries_in_chunk(file_lines_numbered: &Vec, func_def_line_num: usize, system_prompt: &str) -> Option { // divide lines into chunks and call with each chunk until line_end is found or files is empty let chunk_size = 70; let mut start = func_def_line_num; while start < file_lines_numbered.len() { let end = std::cmp::min(start + chunk_size, file_lines_numbered.len()); - let chunk: Vec<&str> = file_lines_numbered[start..end].to_vec(); + let chunk: Vec = file_lines_numbered[start..end].to_vec(); let chunk_str = chunk.join("\n"); let input = LlmFuncBoundaryInput { @@ -194,27 +188,32 @@ async fn get_function_boundaries_in_chunk(file_lines_numbered: &Vec<&str>, func_ } let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", system_prompt, llm_req_prompt); - match call_llm_api(prompt).await { - None => { - log::error!("[mermaid_comment] Failed to call LLM API"); - return None; - } - Some(llm_response) => { - let func_resp_res = serde_json::from_str(&llm_response); - if func_resp_res.is_err() { - let e = func_resp_res.expect_err("Empty error func_resp_res"); - log::error!("[get_function_boundaries_in_chunk] Unable to deserialize response"); - return None; - } - let func_resp: LlmFuncBoundaryResponse = func_resp_res.expect("Uncaught error in func_resp_res"); - if func_resp.function_boundary == -1 { - start += chunk_size; - continue; - } - return Some(func_resp); - } + // match call_llm_api(prompt).await { + // None => { + // log::error!("[mermaid_comment] Failed to call LLM API"); + // return None; + // } + // Some(llm_response) => { + // let func_resp_res = serde_json::from_str(&llm_response); + // if func_resp_res.is_err() { + // let e = func_resp_res.expect_err("Empty error func_resp_res"); + // log::error!("[get_function_boundaries_in_chunk] Unable to deserialize response"); + // return None; + // } + // let func_resp: LlmFuncBoundaryResponse = func_resp_res.expect("Uncaught error in func_resp_res"); + // if func_resp.function_boundary == -1 { + // start += chunk_size; + // continue; + // } + // return Some(func_resp); + // } + // } + let func_resp = LlmFuncBoundaryResponse { function_boundary: 79 }; + if func_resp.function_boundary == -1 { + start += chunk_size; + continue; } + return Some(func_resp); } - return None; } \ No newline at end of file From 8bb7eb20609a4356aa13839a4a45df1c9d8518eb Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Tue, 13 Aug 2024 01:49:17 +0530 Subject: [PATCH 17/43] Implement edge rendering and diff info generation --- vibi-dpu/src/core/relevance.rs | 14 +- vibi-dpu/src/db/graph_info.rs | 45 +++ vibi-dpu/src/db/mod.rs | 3 +- vibi-dpu/src/db/review.rs | 2 +- vibi-dpu/src/graph/elements.rs | 216 +++++++++++++++ vibi-dpu/src/graph/file_imports.rs | 256 ++++++++++++++++++ vibi-dpu/src/{llm => graph}/function_info.rs | 13 +- vibi-dpu/src/graph/function_line_range.rs | 235 ++++++++++++++++ vibi-dpu/src/{llm => graph}/gitops.rs | 0 vibi-dpu/src/graph/graph_edges.rs | 19 ++ vibi-dpu/src/graph/graph_info.rs | 207 ++++++++++++++ .../src/{llm => graph}/mermaid_elements.rs | 50 +++- vibi-dpu/src/graph/mod.rs | 8 + vibi-dpu/src/{llm => graph}/utils.rs | 52 +++- vibi-dpu/src/llm/elements.rs | 193 ------------- vibi-dpu/src/llm/function_line_range.rs | 219 --------------- vibi-dpu/src/llm/mod.rs | 6 - vibi-dpu/src/main.rs | 2 +- vibi-dpu/src/utils/gitops.rs | 3 +- 19 files changed, 1092 insertions(+), 451 deletions(-) create mode 100644 vibi-dpu/src/db/graph_info.rs create mode 100644 vibi-dpu/src/graph/elements.rs create mode 100644 vibi-dpu/src/graph/file_imports.rs rename vibi-dpu/src/{llm => graph}/function_info.rs (94%) create mode 100644 vibi-dpu/src/graph/function_line_range.rs rename vibi-dpu/src/{llm => graph}/gitops.rs (100%) create mode 100644 vibi-dpu/src/graph/graph_edges.rs create mode 100644 vibi-dpu/src/graph/graph_info.rs rename vibi-dpu/src/{llm => graph}/mermaid_elements.rs (82%) create mode 100644 vibi-dpu/src/graph/mod.rs rename vibi-dpu/src/{llm => graph}/utils.rs (68%) delete mode 100644 vibi-dpu/src/llm/elements.rs delete mode 100644 vibi-dpu/src/llm/function_line_range.rs delete mode 100644 vibi-dpu/src/llm/mod.rs diff --git a/vibi-dpu/src/core/relevance.rs b/vibi-dpu/src/core/relevance.rs index d0206fb0..dab7d9f9 100644 --- a/vibi-dpu/src/core/relevance.rs +++ b/vibi-dpu/src/core/relevance.rs @@ -1,6 +1,6 @@ use std::collections::{HashMap, HashSet}; -use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, llm::mermaid_elements::generate_mermaid_flowchart, utils::{aliases::get_login_handles, gitops::StatItem, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; +use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, graph::mermaid_elements::generate_mermaid_flowchart, utils::{aliases::get_login_handles, gitops::StatItem, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; use crate::utils::review::Review; use crate::utils::repo_config::RepoConfig; @@ -226,16 +226,20 @@ async fn comment_text(relevance_vec: &Vec, auto_assign: bool, comment += "If you are a relevant reviewer, you can use the [Vibinex browser extension](https://chromewebstore.google.com/detail/vibinex-code-review/jafgelpkkkopeaefadkdjcmnicgpcncc) to see parts of the PR relevant to you\n"; // Added a newline at the end comment += "Relevance of the reviewer is calculated based on the git blame information of the PR. To know more, hit us up at contact@vibinex.com.\n\n"; // Added two newlines comment += "To change comment and auto-assign settings, go to [your Vibinex settings page.](https://vibinex.com/u)\n"; // Added a newline at the end - - if let Some(mermaid_text) = mermaid_comment(small_files, review).await { + let all_diff_files: Vec = excluded_files + .iter() + .chain(small_files.iter()) + .cloned() // Clone the StatItem instances since `iter` returns references + .collect(); // Collect into a new vector + if let Some(mermaid_text) = mermaid_comment(&all_diff_files, review).await { comment += mermaid_text.as_str(); } return comment; } -pub async fn mermaid_comment(small_files: &Vec, review: &Review) -> Option { - let flowchart_str_opt = generate_mermaid_flowchart(small_files, review).await; +pub async fn mermaid_comment(diff_files: &Vec, review: &Review) -> Option { + let flowchart_str_opt = generate_mermaid_flowchart(diff_files, review).await; if flowchart_str_opt.is_none() { log::error!("[mermaid_comment] Unable to generate flowchart for review: {}", review.id()); return None; diff --git a/vibi-dpu/src/db/graph_info.rs b/vibi-dpu/src/db/graph_info.rs new file mode 100644 index 00000000..0e0d1d2c --- /dev/null +++ b/vibi-dpu/src/db/graph_info.rs @@ -0,0 +1,45 @@ +use sled::IVec; + +use crate::{db::config::get_db, graph::graph_info::GraphInfo}; +pub fn save_graph_info_to_db(review_key: &str, commit_id: &str, graph_info: &GraphInfo) { + let db = get_db(); + let graph_info_key = format!("graph_info/{}/{}", review_key, commit_id); + // Serialize repo struct to JSON + let json = serde_json::to_vec(graph_info).expect("Failed to serialize review"); + // Insert JSON into sled DB + let insert_res = db.insert(IVec::from(graph_info_key.as_bytes()), json); + if insert_res.is_err() { + let e = insert_res.expect_err("No error in insert_res"); + log::error!("[save_graph_info_to_db] Failed to upsert graph info into sled DB: {e}"); + return; + } + log::debug!("[save_graph_info_to_db] Graph Info succesfully upserted: {:#?}", graph_info); +} + +pub fn get_graph_info_from_db(review_key: &str, commit_id: &str) -> Option { + let db = get_db(); + let graph_info_key = format!("graph_info/{}/{}", review_key, commit_id); + let graph_info_res = db.get(IVec::from(graph_info_key.as_bytes())); + if let Err(e) = graph_info_res { + log::error!("[get_graph_info_from_db] GraphInfo key not found in db - {}, error: {:?}", + &graph_info_key, e); + return None; + } + let ivec_opt = graph_info_res.expect("Uncaught error in graph_info_res"); + log::debug!("[get_graph_info_from_db] ivec_opt: {:?}", ivec_opt); + if ivec_opt.is_none() { + log::error!("[get_graph_info_from_db] No graph info found for {}/{}", review_key, commit_id); + return None; + } + let ivec = ivec_opt.expect("Empty ivec_opt"); + let graph_info_res = serde_json::from_slice(&ivec); + if let Err(e) = graph_info_res { + log::error!( + "[get_graph_info_from_db] Failed to deserialize review from json: {:?}", + e + ); + return None; + } + let graph_info: GraphInfo = graph_info_res.expect("Uncaught error in graph_info_res"); + return Some(graph_info); +} \ No newline at end of file diff --git a/vibi-dpu/src/db/mod.rs b/vibi-dpu/src/db/mod.rs index dff5cbd4..10e5bbd6 100644 --- a/vibi-dpu/src/db/mod.rs +++ b/vibi-dpu/src/db/mod.rs @@ -9,4 +9,5 @@ pub mod repo_config; pub mod prs; pub mod bitbucket; pub mod github; -pub mod aliases; \ No newline at end of file +pub mod aliases; +pub mod graph_info; \ No newline at end of file diff --git a/vibi-dpu/src/db/review.rs b/vibi-dpu/src/db/review.rs index 1d8ce0c9..a1d2909f 100644 --- a/vibi-dpu/src/db/review.rs +++ b/vibi-dpu/src/db/review.rs @@ -38,7 +38,7 @@ pub fn get_review_from_db(repo_name: &str, repo_owner: &str, let review_res = serde_json::from_slice(&ivec); if let Err(e) = review_res { log::error!( - "[get_handles_from_db] Failed to deserialize review from json: {:?}", + "[get_review_from_db] Failed to deserialize review from json: {:?}", e ); return None; diff --git a/vibi-dpu/src/graph/elements.rs b/vibi-dpu/src/graph/elements.rs new file mode 100644 index 00000000..0a6b5e11 --- /dev/null +++ b/vibi-dpu/src/graph/elements.rs @@ -0,0 +1,216 @@ +use std::{borrow::Borrow, cell::{Ref, RefCell}, collections::HashMap, rc::Rc}; +use serde::{Serialize, Deserialize}; + +use super::utils::generate_random_string; + +#[derive(Debug, Default, Clone)] +pub struct MermaidSubgraph { + name: String, + nodes: HashMap>>, + mermaid_id: String +} + +impl MermaidSubgraph { + // Constructor + pub fn new(name: String) -> Self { + let mermaid_id = generate_random_string(4); + Self { name, nodes: HashMap::new(), mermaid_id } + } + + // Getter for nodes + pub fn nodes(&self) -> &HashMap>> { + self.nodes.borrow() + } + + pub fn mermaid_id(&self) -> &String { + &self.mermaid_id + } + + // Setter for nodes + pub fn set_nodes(&mut self, nodes: HashMap>>) { + self.nodes = nodes; + } + + pub fn add_node(&mut self, node: Rc>) { + let function_name = { + let node_borrowed: Ref = RefCell::borrow(&*node); + node_borrowed.function_name().to_string() + }; + if self.nodes.contains_key(&function_name) { + log::error!( + "[add_node] Node already exists: old - {:#?}, new - {:#?}", + &self.nodes[&function_name], node); + return; + } + self.nodes.insert(function_name, node); + } + + pub fn render_subgraph(&self) -> String{ + let mut all_nodes = Vec::new(); + for (_, node) in self.nodes() { + let node_borrowed = RefCell::borrow(&*node); + all_nodes.push(node_borrowed.render_node()); + } + let subgraph_str = format!( + "\tsubgraph {} [{}]\n{}\nend\n", + self.mermaid_id, + self.name, + all_nodes.join("\n") + ); + // self.set_subgraph_str(Some(subgraph_str)); + return subgraph_str; + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct MermaidNode { + function_name: String, + mermaid_id: String, +} + +impl MermaidNode { + // Constructor + pub fn new( function_name: String) -> Self { + let mermaid_id = generate_random_string(4); + Self { mermaid_id, function_name } + } + + // Getter for function_name + pub fn function_name(&self) -> &String { + &self.function_name + } + + // Getter for mermaid_id + pub fn mermaid_id(&self) -> &String { + &self.mermaid_id + } + + // Setter for function_name + pub fn set_function_name(&mut self, function_name: String) { + self.function_name = function_name; + } + + pub fn render_node(&self) -> String { + let node_str = format!("\t{}[{}]", &self.mermaid_id, &self.function_name); + // self.set_node_str(Some(node_str.clone())); + return node_str; + } +} + +#[derive(Debug, Default, Clone)] +pub struct MermaidEdge { + line: usize, + caller_function: Rc>, + called_function: Rc>, + color: String, +} + +impl MermaidEdge { + // Constructor + pub fn new(line: usize, caller_function: &Rc>, called_function: &Rc>, color: String) -> Self { + Self { + line, + caller_function: Rc::clone(caller_function), + called_function: Rc::clone(called_function), + color } + } + + // Getter for edge_str + pub fn line(&self) -> usize { + self.line + } + + // Getter for color + pub fn color(&self) -> &String { + &self.color + } + + // Setter for color + pub fn set_color(&mut self, color: String) { + self.color = color; + } + + pub fn add_edge_and_nodes(&mut self) { + // add edge and source and destination nodes + } + + pub fn render_edge_definition(&self) -> String { + let (caller_str, called_str) = { + let caller_borrowed: Ref = RefCell::borrow(&*self.caller_function); + let called_borrowed: Ref = RefCell::borrow(&*self.called_function); + (caller_borrowed.function_name().to_string(), called_borrowed.function_name().to_string()) + }; + let edge_str = format!( + "\t{} -- Line {} --> {}\n", + caller_str, + self.line, + called_str, + ); + return edge_str; + } + + pub fn render_edge_style(&self) -> String { + let style_str = format!( + "stroke:{},stroke-width:4px;", + self.color() + ); + return style_str; + } +} + +#[derive(Debug, Default, Clone)] +pub struct MermaidGraphElements { + edges: Vec, + subgraphs: HashMap, +} + +impl MermaidGraphElements { + pub fn new() -> Self { + Self { + edges: Vec::new(), + subgraphs: HashMap::new(), + } + } + + pub fn add_edge(&mut self, edge: MermaidEdge, from_subgraph: &MermaidSubgraph, to_subgraph: &MermaidSubgraph) { + self.edges.push(edge); + self.add_subgraph(from_subgraph); + self.add_subgraph(to_subgraph); + } + + fn add_subgraph(&mut self, subgraph: &MermaidSubgraph) { + if !self.subgraphs.contains_key(subgraph.mermaid_id()) { + self.subgraphs.insert(subgraph.mermaid_id().to_string(), + subgraph.to_owned()); + } + } + + fn render_edges(&self) -> String { + let mut all_edges = Vec::::new(); + let mut all_edges_style = Vec::::new(); + for (idx, edge) in self.edges.iter().enumerate() { + all_edges.push(edge.render_edge_definition()); + all_edges_style.push( + format!("\tlinkStyle {} {}", idx, edge.render_edge_style()) + ); + } + let all_edges_str = format!( + "{}{}", + all_edges.join("\n"), + all_edges_style.join("\n") + ); + return all_edges_str; + } + + fn render_subgraphs(&self) -> String { + return self.subgraphs.values().map( + |subgraph| subgraph.render_subgraph() + ).collect::>().join("\n"); + } + + pub fn render_elements(&self) -> String { + let all_elements_str = format!("{}\n{}", + &self.render_subgraphs(), &self.render_edges()); + return all_elements_str; + } +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/file_imports.rs b/vibi-dpu/src/graph/file_imports.rs new file mode 100644 index 00000000..9984751d --- /dev/null +++ b/vibi-dpu/src/graph/file_imports.rs @@ -0,0 +1,256 @@ +use std::{collections::HashMap, path::PathBuf}; + +use serde::{Deserialize, Serialize}; + +use crate::{graph::utils::numbered_content, utils::review::Review}; + +use super::utils::{all_code_files, call_llm_api, read_file}; + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmImportLineInput { + language: String, + file_path: String, + chunk: String +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmImportLineRequest { + input: LlmImportLineInput +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct FileImportLines { + lines: Vec +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmImportPathInput { + language: String, + file_path: String, + import_lines: String +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmImportPathRequest { + input: LlmImportPathInput +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct ImportPath { + import_line: String, + import_path: String, + imported: String +} + +impl PartialEq for ImportPath { + fn eq(&self, other: &Self) -> bool { + self.import_line == other.import_line && self.import_path == other.import_path && self.imported == other.imported + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct ChunkImportInfo { + import_lines: FileImportLines, + import_paths: Vec +} + +impl ChunkImportInfo { + pub fn import_paths(&self) -> &Vec { + &self.import_paths + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct FileImportInfo { + import_chunk_info: Vec, + filepath: String +} + +impl FileImportInfo { + pub fn all_import_paths(&self) -> Vec { + let all_paths: Vec = self.import_chunk_info + .iter() + .flat_map(|chunk| chunk.import_paths()) + .cloned() + .collect(); + return all_paths; + } + + pub fn filepath(&self) -> &String { + &self.filepath + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct AllFileImportInfo { + file_import_map: HashMap +} + +impl AllFileImportInfo { + pub fn files(&self) -> Vec<&String> { + self.file_import_map.keys().collect() + } + + pub fn is_import_in_file(&self, filename: &str, import_path: &ImportPath) -> bool { + self.file_import_map[filename].all_import_paths().contains(import_path) + } + + pub fn file_import_info(&self, filename: &str) -> Option<&FileImportInfo> { + self.file_import_map.get(filename) + } +} + +pub async fn get_import_lines(file_paths: &Vec) -> Option { + let mut all_import_info = HashMap::::new(); + let system_prompt_opt = read_file("/app/prompts/prompt_import_lines"); + if system_prompt_opt.is_none() { + log::error!("[get_import_lines] Unable to read prompt_import_lines"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let system_prompt_path_opt = read_file("/app/prompts/prompt_import_path"); + if system_prompt_path_opt.is_none() { + log::error!("[get_import_lines] Unable to read prompt_import_path"); + return None; + } + let system_prompt_path = system_prompt_path_opt.expect("Empty system_prompt"); + for path in file_paths { + log::debug!("[get_import_lines] path = {:?}", path); + let file_contents = std::fs::read_to_string(path.clone()).ok()?; + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(50); + let path_str = path.to_str().expect("Empty path"); + let mut chunks_import_vec = Vec::::new(); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + let import_lines_opt = get_import_lines_chunk( + &system_prompt_lines, &chunk_str, + path_str).await; + if import_lines_opt.is_none() { + log::error!("[get_import_lines] Skipping chunk, unable to get import lines"); + continue; + } + let import_lines_chunk = import_lines_opt.expect("Empty func_boundary_opt"); + if let Some(import_paths) = get_import_path_file(&numbered_content, + import_lines_chunk.clone(), &system_prompt_path, path_str).await { + let chunk_import_info = ChunkImportInfo { import_lines: import_lines_chunk, import_paths }; + chunks_import_vec.push(chunk_import_info); + } + } + let import_info = FileImportInfo { + import_chunk_info: chunks_import_vec, filepath: path_str.to_string() }; + all_import_info.insert(path_str.to_string(), import_info); + } + if all_import_info.is_empty() { + return None; + } + return Some(AllFileImportInfo { file_import_map: all_import_info }); +} + +async fn get_import_lines_chunk(system_prompt_lines: &str, chunk_str: &str, file_path: &str) -> Option { + let llm_req = LlmImportLineRequest { input: + LlmImportLineInput { + language: "rust".to_string(), + file_path: file_path.to_string(), + chunk: chunk_str.to_string() } }; + let llm_req_res = serde_json::to_string(&llm_req); + if llm_req_res.is_err() { + log::error!("[get_function_defs_in_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); + return None; + } + let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); + let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", + system_prompt_lines, llm_req_prompt); + match call_llm_api(prompt).await { + None => { + log::error!("[get_import_lines_chunk] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + let import_res = serde_json::from_str(&llm_response); + if import_res.is_err() { + log::error!( + "[get_import_lines_chunk] funcdefs error: {}", + import_res.expect_err("Empty error in funcdefs_res")); + return None; + } + let import_lines_file: FileImportLines = import_res.expect("Uncaught error in funcdefs_res"); + return Some(import_lines_file); + } + } +} + +async fn get_import_path_file(numbered_content: &Vec, import_line: FileImportLines, system_prompt: &str, file_path: &str) -> Option> { + let mut import_paths = Vec::::new(); + // get import lines from numbered lines + let import_lines_str_opt = numbered_import_lines(numbered_content, import_line); + if import_lines_str_opt.is_none() { + log::error!("[get_import_path_file] Unable to get numbered import line"); + return None; + } + let import_lines_str_chunks = import_lines_str_opt.expect("Empty import_lines_str_opt"); + for import_lines_chunk in import_lines_str_chunks { + let llm_req = LlmImportPathRequest{ + input: LlmImportPathInput { + language: "rust".to_string(), + file_path: file_path.to_string(), + import_lines: import_lines_chunk + } + }; + let llm_req_res = serde_json::to_string(&llm_req); + if llm_req_res.is_err() { + log::error!("[get_import_path_file] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); + return None; + } + let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); + let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", + system_prompt, llm_req_prompt); + match call_llm_api(prompt).await { + None => { + log::error!("[get_import_path_file] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + let import_res = serde_json::from_str(&llm_response); + if import_res.is_err() { + log::error!( + "[get_import_path_file] funcdefs error: {}", + import_res.expect_err("Empty error in funcdefs_res")); + continue; + } + let import_path: ImportPath = import_res.expect("Uncaught error in funcdefs_res"); + import_paths.push(import_path); + } + } + } + if import_paths.is_empty() { + return None; + } + return Some(import_paths); +} + +fn numbered_import_lines(numbered_content: &Vec, import_line: FileImportLines) -> Option>{ + let mut chunks = Vec::new(); + let mut chunk = String::new(); + let mut line_count = 0; + + for line in import_line.lines { + if line_count == 30 { + chunks.push(chunk.clone()); + chunk = String::new(); + line_count = 0; + } + chunk += &numbered_content[line as usize]; + line_count += 1; + } + + // Push the last chunk if it's not empty + if !chunk.is_empty() { + chunks.push(chunk); + } + + if chunks.is_empty() { + return None; + } + Some(chunks) +} \ No newline at end of file diff --git a/vibi-dpu/src/llm/function_info.rs b/vibi-dpu/src/graph/function_info.rs similarity index 94% rename from vibi-dpu/src/llm/function_info.rs rename to vibi-dpu/src/graph/function_info.rs index bd4fedbf..9ccc0dcf 100644 --- a/vibi-dpu/src/llm/function_info.rs +++ b/vibi-dpu/src/graph/function_info.rs @@ -42,7 +42,7 @@ pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> let batch_size = 30; let mut prev_state: Option = None; // Iterate over the lines in chunks of batch_size - for chunk in lines.chunks(batch_size) { + for (chunk_idx, chunk) in lines.chunks(batch_size).enumerate() { // create prompt // call llm api let mut prev_state_str = "{}".to_string(); @@ -52,12 +52,13 @@ pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> } } let prompt = format!( - "{}\n\n### User Message\nInput -\nprev_state ={}\n{}\n{}\n\nOutput -", + "{}\n\n### User Message\nInput -\nprev_state = {}\n{}\n{}\n\nOutput -", system_prompt, - prev_state_str, + &prev_state_str, file_name, chunk.join("\n") ); + log::debug!("[extract_function_lines] prev_state_str = {}", &prev_state_str); match call_llm_api(prompt).await { None => { log::error!("[mermaid_comment] Failed to call LLM API"); @@ -83,7 +84,11 @@ pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> let functions_arr = flinemapresp.functions.expect("Empty functions"); if !functions_arr.is_empty() { if let Some(func_obj) = functions_arr.last() { - if func_obj.line_end == -1 { + let last_line_chunk = ((batch_size * (chunk_idx + 1)) - 1) as i32; + log::debug!( + "[extract_function_lines] last_line_chunk = {}, func_obj.line_end = {} ", + last_line_chunk, func_obj.line_end); + if func_obj.line_end == last_line_chunk { prev_state = Some(func_obj.clone()); } } diff --git a/vibi-dpu/src/graph/function_line_range.rs b/vibi-dpu/src/graph/function_line_range.rs new file mode 100644 index 00000000..38299035 --- /dev/null +++ b/vibi-dpu/src/graph/function_line_range.rs @@ -0,0 +1,235 @@ +use std::{collections::HashMap, path::PathBuf}; + +use serde::{Deserialize, Serialize}; +use walkdir::WalkDir; + +use crate::{graph::utils::numbered_content, utils::review::Review}; + +use super::utils::{all_code_files, call_llm_api, read_file}; + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct FuncDefInfo { + name: String, + line_start: usize, + line_end: usize, + parent: String, +} + +impl PartialEq for FuncDefInfo { + fn eq(&self, other: &Self) -> bool { + self.name == other.name && self.line_start == other.line_start + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct FunctionFileMap { + file_name: String, + functions: Vec + // implement a function which takes in starting and ending line numbers of a continous range + // and returns the functions inside the range like Vec of ((start_line, end_line) function_name) +} + +impl FunctionFileMap { + pub fn functions(&self) -> &Vec { + &self.functions + } + + pub fn is_func_in_file(&self, func: &FuncDefInfo) -> bool { + self.functions.contains(func) + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct AllFileFunctions { + func_map: HashMap // file name will be key +} + +impl AllFileFunctions { + + pub fn functions_in_file(&self, filename: &str) -> Option<&FunctionFileMap> { + self.func_map.get(filename) + } + + pub fn all_files(&self) -> Vec<&String> { + self.func_map.keys().collect::>() + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDefInput { + language: String, + chunk: String +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDefRequest { + input: LlmFuncDefInput +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDef { + name: String, + line_num: usize, + parent: String +} +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncDefResponse { + functions: Vec +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncBoundaryInput { + language: String, + func_declared: String, + chunk: String + +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncBoundaryRequest { + input: LlmFuncBoundaryInput +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +struct LlmFuncBoundaryResponse { + function_boundary: i32 +} + +pub async fn generate_function_map(file_paths: &Vec) -> Option { + let mut all_file_functions = AllFileFunctions { func_map: HashMap::new() }; + let system_prompt_opt = read_file("/app/prompts/prompt_function_lines"); + if system_prompt_opt.is_none() { + log::error!("[generate_function_map] Unable to read prompt_function_lines"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let system_prompt_end_opt = read_file("/app/prompts/prompt_function_boundary"); + if system_prompt_end_opt.is_none() { + log::error!("[generate_function_map] Unable to read prompt_function_boundary"); + return None; + } + let system_prompt_lines_end = system_prompt_end_opt.expect("Empty system_prompt"); + for path in file_paths { + log::debug!("[generate_function_map] path = {:?}", path); + let mut function_map = FunctionFileMap { + file_name: path.to_str().to_owned().unwrap_or("").to_string(), + functions: Vec::new(), + }; + let file_contents = std::fs::read_to_string(path.clone()).ok()?; + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(50); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + let function_defs_opt = get_function_defs_in_chunk(&chunk_str, &system_prompt_lines).await; + if function_defs_opt.is_none() { + log::error!("[generate_function_map] Unable to get functions from llm"); + continue; + } + let function_defs = function_defs_opt.expect("Empty function_defs"); + for func_def in function_defs.functions.iter() { + let func_boundary_opt = get_function_boundaries_in_chunk(&numbered_content, func_def.line_num, &system_prompt_lines_end).await; + if func_boundary_opt.is_none() { + continue; + } + let func_boundary = func_boundary_opt.expect("Empty func_boundary_opt"); + function_map.functions.push(FuncDefInfo { + name: func_def.name.clone(), + line_start: func_def.line_num, + line_end: func_boundary.function_boundary as usize, + parent: func_def.parent.clone(), + }); + } + } + log::debug!("[generate_function_map] func_map = {:#?}", &function_map); + all_file_functions.func_map.insert(path.to_str().unwrap().to_string(), function_map); + } + return Some(all_file_functions); +} + +async fn get_function_defs_in_chunk(chunk: &str, system_prompt: &str) -> Option { + let llm_req = LlmFuncDefRequest { + input: LlmFuncDefInput { + language: "rust".to_string(), + chunk: chunk.to_string() + } + }; + let llm_req_res = serde_json::to_string(&llm_req); + if llm_req_res.is_err() { + log::error!("[get_function_defs_in_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); + return None; + } + let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); + let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", system_prompt, llm_req_prompt); + match call_llm_api(prompt).await { + None => { + log::error!("[get_function_defs_in_chunk] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + let funcdefs_res = serde_json::from_str(&llm_response); + if funcdefs_res.is_err() { + log::error!( + "[get_function_defs_in_chunk] funcdefs error: {}", + funcdefs_res.expect_err("Empty error in funcdefs_res")); + return None; + } + let funcdefs: LlmFuncDefResponse = funcdefs_res.expect("Uncaught error in funcdefs_res"); + return Some(funcdefs); + } + } + // let funcdefs = LlmFuncDefResponse{ functions: vec![LlmFuncDef{ name: "main".to_string(), line_num: 18, parent: "".to_string() }] }; + // return Some(funcdefs); +} + +async fn get_function_boundaries_in_chunk(file_lines_numbered: &Vec, func_def_line_num: usize, system_prompt: &str) -> Option { + // divide lines into chunks and call with each chunk until line_end is found or files is empty + let chunk_size = 70; + let mut start = func_def_line_num; + + while start < file_lines_numbered.len() { + let end = std::cmp::min(start + chunk_size, file_lines_numbered.len()); + let chunk: Vec = file_lines_numbered[start..end].to_vec(); + let chunk_str = chunk.join("\n"); + + let input = LlmFuncBoundaryInput { + language: "rust".to_string(), // Assuming Rust as language, you can modify this as needed + func_declared: file_lines_numbered[func_def_line_num].to_string(), + chunk: chunk_str, + }; + let llm_req = LlmFuncBoundaryRequest { input }; + let llm_req_res = serde_json::to_string(&llm_req); + if llm_req_res.is_err() { + log::error!("[get_function_boundaries_in_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); + return None; + } + let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); + let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", system_prompt, llm_req_prompt); + match call_llm_api(prompt).await { + None => { + log::error!("[get_function_boundaries_in_chunk] Failed to call LLM API"); + return None; + } + Some(llm_response) => { + let func_resp_res = serde_json::from_str(&llm_response); + if func_resp_res.is_err() { + let e = func_resp_res.expect_err("Empty error func_resp_res"); + log::error!("[get_function_boundaries_in_chunk] Unable to deserialize response"); + return None; + } + let func_resp: LlmFuncBoundaryResponse = func_resp_res.expect("Uncaught error in func_resp_res"); + if func_resp.function_boundary == -1 { + start += chunk_size; + continue; + } + return Some(func_resp); + } + } + // let func_resp = LlmFuncBoundaryResponse { function_boundary: 79 }; + // if func_resp.function_boundary == -1 { + // start += chunk_size; + // continue; + // } + // return Some(func_resp); + } + return None; +} \ No newline at end of file diff --git a/vibi-dpu/src/llm/gitops.rs b/vibi-dpu/src/graph/gitops.rs similarity index 100% rename from vibi-dpu/src/llm/gitops.rs rename to vibi-dpu/src/graph/gitops.rs diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs new file mode 100644 index 00000000..167ec78f --- /dev/null +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -0,0 +1,19 @@ +async fn graph_edges() { + let incoming_edges = incoming_edges().await; + let outgoing_edges = outgoing_edges().await; + let graph = edge_nodes().await; +} + +async fn incoming_edges() { + // find incoming edges from full_graph to diff_graph + // find incoming green edges from diff_graph to diff_graph +} + +async fn outgoing_edges() { + // find outgoing edges from diff_graph to full_graph + // find outgoing edges from diff_graph to diff_graph +} + +async fn edge_nodes() { + // render all edges and their nodes +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/graph_info.rs b/vibi-dpu/src/graph/graph_info.rs new file mode 100644 index 00000000..46871c7a --- /dev/null +++ b/vibi-dpu/src/graph/graph_info.rs @@ -0,0 +1,207 @@ +use std::{collections::HashMap, path::PathBuf}; + +use serde::{Deserialize, Serialize}; + +use crate::{db::graph_info::{get_graph_info_from_db, save_graph_info_to_db}, graph::{file_imports::get_import_lines, function_line_range::generate_function_map, utils::all_code_files}, utils::gitops::StatItem}; + +use super::{file_imports::{AllFileImportInfo, ImportPath}, function_line_range::{AllFileFunctions, FuncDefInfo}, utils::source_diff_files}; + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct DiffInfo { + added_funcs: Option>>, // key is filename + deleted_funcs: Option>>, // key is filename + added_imports: Option>>, // key is filename + deleted_imports: Option>> // key is filename +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct GraphInfo { + function_info: AllFileFunctions, + import_info: AllFileImportInfo +} + +impl GraphInfo { + pub fn function_info(&self) -> &AllFileFunctions { + &self.function_info + } + + pub fn import_info(&self) -> &AllFileImportInfo { + &self.import_info + } +} + +async fn generate_graph_info(source_file_paths: &Vec) -> Option { + let function_map_opt = generate_function_map(source_file_paths).await; + if function_map_opt.is_none() { + log::error!("[generate_graph_info] Unable to generate function map"); + return None; + } + let function_map = function_map_opt.expect("Empty function_map_opt"); + log::debug!("[generate_graph_info] func map = {:?}", &function_map); + let all_file_import_info_opt = get_import_lines(source_file_paths).await; + if all_file_import_info_opt.is_none() { + log::error!("[generate_graph_info] Unable to get import info for source files: {:#?}", source_file_paths); + return None; + } + let all_file_import_info = all_file_import_info_opt.expect("Empty import_lines_opt"); + let graph_info = GraphInfo { function_info: function_map, + import_info: all_file_import_info }; + return Some(graph_info); +} + +pub async fn generate_full_graph(repo_dir: &str, review_key: &str, commit_id: &str) -> Option { + // check for graph db + if let Some(graph_info) = get_graph_info_from_db(review_key, commit_id) { + return Some(graph_info); + } + let repo_code_files_opt = all_code_files(repo_dir); + if repo_code_files_opt.is_none() { + log::error!("[generate_full_graph] Unable to get file paths: {}", repo_dir); + return None; + } + let repo_code_files = repo_code_files_opt.expect("Empty repo_code_files_opt"); + let graph_info_opt = generate_graph_info(&repo_code_files).await; + if graph_info_opt.is_none() { + log::error!("[generate_full_graph] Unable to generate full graph for commit: {}", commit_id); + return None; + } + let graph_info = graph_info_opt.expect("Empty graph_info_opt"); + // save all this to db + save_graph_info_to_db(review_key, commit_id, &graph_info); + return Some(graph_info); +} + +pub async fn generate_diff_graph(diff_files: &Vec) -> Option { + let diff_code_files_opt = source_diff_files(diff_files); + if diff_code_files_opt.is_none() { + log::error!("[generate_diff_graph] Unable to get file paths for: {:#?}", diff_files); + return None; + } + let diff_code_files = diff_code_files_opt.expect("Empty diff_code_files_opt"); + let graph_info_opt = generate_graph_info(&diff_code_files).await; + if graph_info_opt.is_none() { + log::error!("[generate_diff_graph] Unable to generate diff graph"); + return None; + } + let graph_info = graph_info_opt.expect("Empty graph_info_opt"); + return Some(graph_info); +} + +fn added_functions_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { + let mut added_funcs = HashMap::>::new(); + for filename in diff_graph.function_info().all_files() { + let func_map_opt = full_graph.function_info().functions_in_file(filename); + if func_map_opt.is_none() { + if let Some(diff_func_map) = diff_graph.function_info().functions_in_file(filename) { + let funcs_vec = diff_func_map.functions().to_owned(); + added_funcs.entry(filename.to_string()) + .or_insert_with(Vec::new) + .extend(funcs_vec); + } + } else { + let full_func_map = func_map_opt.expect("Empty func_map_opt"); + if let Some(diff_func_map) = diff_graph.function_info().functions_in_file(filename) { + for func in diff_func_map.functions() { + if !full_func_map.is_func_in_file(func) { + added_funcs.entry(filename.to_string()) + .or_insert_with(Vec::new) + .push(func.to_owned()); + } + } + } + } + } + if added_funcs.is_empty() { + return None; + } + return Some(added_funcs); +} + +fn deleted_functions_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { + let mut deleted_funcs = HashMap::>::new(); + for filename in diff_graph.function_info().all_files() { + // TODO - full file deleted? + let funcs_opt = full_graph.function_info().functions_in_file(filename); + if funcs_opt.is_none() { + // file added + } + let full_funcs = funcs_opt.expect("Empty funcs_opt"); + let diff_funcs = diff_graph.function_info().functions_in_file(filename).expect("Empty diff_funcs"); + for func in full_funcs.functions() { + if diff_funcs.is_func_in_file(func) { + deleted_funcs.entry(filename.to_string()) + .or_insert_with(Vec::new) + .push(func.to_owned()); + } + } + } + if deleted_funcs.is_empty() { + return None; + } + return Some(deleted_funcs) +} + +fn added_imports_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { + let mut added_imports = HashMap::>::new(); + for filename in diff_graph.import_info().files() { + let diff_imports = diff_graph + .import_info() + .file_import_info(filename).expect("Empty diff imports"); + let full_imports_opt = full_graph + .import_info().file_import_info(filename); + if full_imports_opt.is_none() { + added_imports.entry(filename.to_string()) + .or_insert_with(Vec::new) + .extend(diff_imports.all_import_paths()); + } else { + for import_path in diff_imports.all_import_paths() { + if !full_graph.import_info().is_import_in_file(filename, &import_path) { + added_imports.entry(filename.to_string()) + .or_insert_with(Vec::new) + .push(import_path); + } + } + } + } + if added_imports.is_empty() { + return None; + } + return Some(added_imports); +} + +fn deleted_imports_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { + let mut deleted_imports = HashMap::>::new(); + // TODO - file deleted + for filename in diff_graph.import_info().files() { + let full_imports_opt = full_graph.import_info().file_import_info(filename); + if full_imports_opt.is_none() { + // file added + } + let full_imports = full_imports_opt.expect("Empty full_imports_opt"); + for import_path in full_imports.all_import_paths() { + if !diff_graph.import_info().is_import_in_file(filename, &import_path) { + deleted_imports.entry(filename.to_string()) + .or_insert_with(Vec::new) + .push(import_path); + } + } + } + if deleted_imports.is_empty() { + return None; + } + return Some(deleted_imports); +} + +pub fn generate_diff_info(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> DiffInfo { + // Get added funcs and imports + let added_funcs_opt = added_functions_diff(full_graph, diff_graph); + let deleted_funcs_opt = deleted_functions_diff(full_graph, diff_graph); + let added_imports_opt = added_imports_diff(full_graph, diff_graph); + let deleted_imports_opt = deleted_imports_diff(full_graph, diff_graph); + return DiffInfo { + added_funcs: added_funcs_opt, + deleted_funcs: deleted_funcs_opt, + added_imports: added_imports_opt, + deleted_imports: deleted_imports_opt + }; +} \ No newline at end of file diff --git a/vibi-dpu/src/llm/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs similarity index 82% rename from vibi-dpu/src/llm/mermaid_elements.rs rename to vibi-dpu/src/graph/mermaid_elements.rs index 4b0c23fb..654f694f 100644 --- a/vibi-dpu/src/llm/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -1,18 +1,14 @@ -use std::{borrow::BorrowMut, collections::HashMap}; +use std::{borrow::{Borrow, BorrowMut}, collections::HashMap}; -use crate::utils::{gitops::{git_checkout_commit, StatItem}, review::Review}; +use crate::{graph::{file_imports::get_import_lines, graph_info::{generate_diff_graph, generate_diff_info, generate_full_graph}}, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; -use super::{elements::{MermaidEdge, MermaidEdges, MermaidNode, MermaidSubgraph}, function_info::{extract_function_calls, extract_function_import_path, extract_function_lines, CalledFunction, CalledFunctionPath, FunctionLineMap}, function_line_range::generate_function_map, gitops::get_changed_files, utils::read_file}; +use super::{elements::{MermaidEdge, MermaidEdges, MermaidNode, MermaidSubgraph}, function_line_range::generate_function_map, gitops::get_changed_files, graph_info::GraphInfo, utils::read_file}; -pub async fn generate_mermaid_flowchart(small_files: &Vec, review: &Review) -> Option { - let function_map_opt = generate_function_map(review).await; - if function_map_opt.is_none() { - log::error!("[generate_mermaid_flowchart] Unable to generate function map"); - return None; - } - let function_map = function_map_opt.expect("Empty function_map_opt"); - log::debug!("[generate_mermaid_flowchart] func map = {:?}", &function_map); - let flowchart_content_res = generate_flowchart_elements(small_files, review).await; +pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Review) -> Option { + + + // generate graph using AllFileFunctions, ImportLines and ImportPath + let flowchart_content_res = generate_flowchart_elements(diff_files, review).await; if flowchart_content_res.is_none() { log::error!("[generate_mermaid_flowchart] Unable to generate flowchart content, review: {}", review.id()); return None; @@ -25,11 +21,35 @@ pub async fn generate_mermaid_flowchart(small_files: &Vec, review: &Re return Some(flowchart_str); } -async fn generate_flowchart_elements(small_files: &Vec, review: &Review) -> Option { - let (file_lines_del_map, file_lines_add_map) = get_changed_files(small_files, review); +async fn generate_flowchart_elements(diff_files: &Vec, review: &Review) -> Option { + // generate full graph for base commit id + git_checkout_commit(review, review.base_head_commit()); + let full_graph_opt = generate_full_graph(&review.clone_dir(), + &review.db_key(), &review.base_head_commit()).await; + if full_graph_opt.is_none() { + log::error!( + "[generate_flowchart_elements] Unable to generate full graph for review: {}", + review.id()); + return None; + } + let full_graph = full_graph_opt.expect("Empty full_graph_opt"); + // generate diff graph for head commit id + git_checkout_commit(review, review.pr_head_commit()); + let diff_graph_opt = generate_diff_graph(diff_files).await; + if diff_graph_opt.is_none() { + log::error!( + "[generate_flowchart_elements] Unable to generate diff graph for review: {}", + review.id()); + return None; + } + let diff_graph = diff_graph_opt.expect("Empty diff_graph_opt"); + let diff_info = generate_diff_info(&full_graph, &diff_graph); + + + let (file_lines_del_map, file_lines_add_map) = get_changed_files(diff_files, review); let mut subgraph_map = HashMap::::new(); let mut edges = MermaidEdges::new(Vec::::new()); - let files: Vec = small_files.iter().map(|item| item.filepath.clone()).collect(); + let files: Vec = diff_files.iter().map(|item| item.filepath.clone()).collect(); for file in files.iter() { if file_lines_add_map.contains_key(file) { generate_mermaid_content( diff --git a/vibi-dpu/src/graph/mod.rs b/vibi-dpu/src/graph/mod.rs new file mode 100644 index 00000000..5df01be1 --- /dev/null +++ b/vibi-dpu/src/graph/mod.rs @@ -0,0 +1,8 @@ +pub mod utils; +pub mod gitops; +pub mod mermaid_elements; +pub mod elements; +pub mod function_line_range; +pub mod file_imports; +pub mod graph_info; +pub mod graph_edges; \ No newline at end of file diff --git a/vibi-dpu/src/llm/utils.rs b/vibi-dpu/src/graph/utils.rs similarity index 68% rename from vibi-dpu/src/llm/utils.rs rename to vibi-dpu/src/graph/utils.rs index 4f3b2746..5ac53436 100644 --- a/vibi-dpu/src/llm/utils.rs +++ b/vibi-dpu/src/graph/utils.rs @@ -1,13 +1,14 @@ -use std::{collections::HashMap, path::Path}; +use std::{collections::HashMap, path::{Path, PathBuf}, slice::Chunks}; use futures_util::StreamExt; use serde::{Deserialize, Serialize}; use serde_json::json; +use walkdir::WalkDir; use std::fs; use rand::Rng; -use crate::utils::reqwest_client::get_client; +use crate::utils::{gitops::StatItem, reqwest_client::get_client, review::Review}; #[derive(Debug, Serialize, Default, Deserialize, Clone)] struct LlmResponse { @@ -19,7 +20,7 @@ struct LlmResponse { pub async fn call_llm_api(prompt: String) -> Option { let client = get_client(); - let url = "http://35.244.9.107/api/generate"; + let url = "http://host.docker.internal:11434/api/generate"; log::debug!("[call_llm_api] Prompt = {:?}", &prompt); let response_res = client.post(url) .json(&json!({"model": "phind-codellama", "prompt": prompt})) @@ -68,11 +69,11 @@ pub async fn call_llm_api(prompt: String) -> Option { } pub fn read_file(file: &str) -> Option { - log::error!("[read_file] file name = {}", &file); + log::debug!("[read_file] file name = {}", &file); let path = Path::new(file); let content_res = fs::read_to_string(path); if !path.exists() { - log::error!("[read_file] Path does not exist: {:?}", &path); + log::error!("[read_file] File does not exist: {:?}", &path); return None; } if content_res.is_err() { @@ -116,4 +117,45 @@ pub fn generate_random_string(length: usize) -> String { }) .collect(); random_string +} + +pub fn all_code_files(dir: &str) -> Option> { + let mut code_files = Vec::::new(); + for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) { + let path = entry.path().to_owned(); + log::debug!("[generate_function_map] path = {:?}", path); + let ext = path.extension().and_then(|ext| ext.to_str()); + log::debug!("[generate_function_map] extension = {:?}", &ext); + if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { + code_files.push(path); + } + } + if code_files.is_empty() { + return None; + } + return Some(code_files); +} + +pub fn source_diff_files(diff_files: &Vec) -> Option> { + let mut code_files = Vec::::new(); + for stat_item in diff_files { + let filepath_str = &stat_item.filepath; + let filepath = Path::new(filepath_str); + if filepath.extension().and_then(|ext| ext.to_str()) == Some("rs") { + code_files.push(filepath.to_path_buf()); + } + } + if code_files.is_empty() { + return None; + } + return Some(code_files); +} + +pub fn numbered_content(file_contents: String) -> Vec { + let lines = file_contents + .lines() + .enumerate() + .map(|(index, line)| format!("{} {}", index+1, line)) + .collect::>(); + return lines; } \ No newline at end of file diff --git a/vibi-dpu/src/llm/elements.rs b/vibi-dpu/src/llm/elements.rs deleted file mode 100644 index 056f66c0..00000000 --- a/vibi-dpu/src/llm/elements.rs +++ /dev/null @@ -1,193 +0,0 @@ -use std::{borrow::BorrowMut, collections::HashMap}; -use serde::{Serialize, Deserialize}; - -use super::utils::generate_random_string; - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct MermaidSubgraph { - name: String, - nodes: HashMap, - mermaid_id: String -} - -impl MermaidSubgraph { - // Constructor - pub fn new(name: String, nodes: HashMap) -> Self { - let mermaid_id = generate_random_string(4); - Self { name, nodes, mermaid_id } - } - - // Getter for nodes - pub fn nodes(&self) -> &HashMap { - &self.nodes - } - - // Setter for nodes - pub fn set_nodes(&mut self, nodes: HashMap) { - self.nodes = nodes; - } - - pub fn add_node(&mut self, node: MermaidNode) { - if self.nodes.contains_key(node.function_name()) { - log::error!( - "[add_node] Node already exists: old - {:#?}, new - {:#?}", - &self.nodes[node.function_name()], &node); - return; - } - self.nodes.insert(node.function_name.to_string(), node); - } - - pub fn render_subgraph(&self) -> String{ - let mut all_nodes = Vec::new(); - for (_, node) in self.nodes() { - all_nodes.push(node.render_node()); - } - let subgraph_str = format!( - "\tsubgraph {} [{}]\n{}\nend\n", - self.mermaid_id, - self.name, - all_nodes.join("\n") - ); - // self.set_subgraph_str(Some(subgraph_str)); - return subgraph_str; - } -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct MermaidNode { - function_name: String, - mermaid_id: String, -} - -impl MermaidNode { - // Constructor - pub fn new( function_name: String) -> Self { - let mermaid_id = generate_random_string(4); - Self { mermaid_id, function_name } - } - - // Getter for function_name - pub fn function_name(&self) -> &String { - &self.function_name - } - - // Getter for mermaid_id - pub fn mermaid_id(&self) -> &String { - &self.mermaid_id - } - - // Setter for function_name - pub fn set_function_name(&mut self, function_name: String) { - self.function_name = function_name; - } - - pub fn render_node(&self) -> String { - let node_str = format!("\t{}[{}]", &self.mermaid_id, &self.function_name); - // self.set_node_str(Some(node_str.clone())); - return node_str; - } -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct MermaidEdge { - line: usize, - caller_function: MermaidNode, - called_function: MermaidNode, - color: String, -} - -impl MermaidEdge { - // Constructor - pub fn new(line: usize, caller_function: MermaidNode, called_function: MermaidNode, color: String) -> Self { - Self { line, caller_function, called_function, color } - } - - // Getter for edge_str - pub fn line(&self) -> usize { - self.line - } - - // Getter for caller_function - pub fn caller_function(&self) -> &MermaidNode { - &self.caller_function - } - - // Setter for caller_function - pub fn set_caller_function(&mut self, caller_function: MermaidNode) { - self.caller_function = caller_function; - } - - // Getter for called_function - pub fn called_function(&self) -> &MermaidNode { - &self.called_function - } - - // Setter for called_function - pub fn set_called_function(&mut self, called_function: MermaidNode) { - self.called_function = called_function; - } - - // Getter for color - pub fn color(&self) -> &String { - &self.color - } - - // Setter for color - pub fn set_color(&mut self, color: String) { - self.color = color; - } - - pub fn render_edge_definition(&self) -> String { - let edge_str = format!( - "\t{} -- Line {} --> {}\n", - self.caller_function().mermaid_id(), - self.line, - self.called_function().mermaid_id(), - ); - return edge_str; - } - - pub fn render_edge_style(&self) -> String { - let style_str = format!( - "stroke:{},stroke-width:4px;", - self.color() - ); - return style_str; - } -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct MermaidEdges { - all_edges: Vec, -} - -impl MermaidEdges { - pub fn new(all_edges: Vec) -> Self { - MermaidEdges {all_edges } - } - - pub fn all_edges(&self) -> &Vec { - return &self.all_edges; - } - - pub fn add_edge(&mut self, edge: MermaidEdge) { - self.all_edges.push(edge); - } - - pub fn render_edges(&self) -> String { - let mut all_edges = Vec::::new(); - let mut all_edges_style = Vec::::new(); - for (idx, edge) in self.all_edges().iter().enumerate() { - all_edges.push(edge.render_edge_definition()); - all_edges_style.push( - format!("\tlinkStyle {} {}", idx, edge.render_edge_style()) - ); - } - let all_edges_str = format!( - "{}{}", - all_edges.join("\n"), - all_edges_style.join("\n") - ); - return all_edges_str; - } -} \ No newline at end of file diff --git a/vibi-dpu/src/llm/function_line_range.rs b/vibi-dpu/src/llm/function_line_range.rs deleted file mode 100644 index c0dd4a35..00000000 --- a/vibi-dpu/src/llm/function_line_range.rs +++ /dev/null @@ -1,219 +0,0 @@ -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; -use walkdir::WalkDir; - -use crate::utils::review::Review; - -use super::utils::{call_llm_api, read_file}; - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct FuncDefInfo { - name: String, - line_start: usize, - line_end: usize, - parent: String, -} -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct FunctionFileMap { - file_name: String, - functions: Vec - // implement a function which takes in starting and ending line numbers of a continous range - // and returns the functions inside the range like Vec of ((start_line, end_line) function_name) -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct AllFileFunctions { - func_map: HashMap // file name will be key -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmFuncDefInput { - language: String, - chunk: String -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmFuncDefRequest { - input: LlmFuncDefInput -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmFuncDef { - name: String, - line_num: usize, - parent: String -} -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmFuncDefResponse { - functions: Vec -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmFuncBoundaryInput { - language: String, - func_declared: String, - chunk: String - -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmFuncBoundaryRequest { - input: LlmFuncBoundaryInput -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmFuncBoundaryResponse { - function_boundary: i32 -} - -pub async fn generate_function_map(review: &Review) -> Option { - let dir = review.clone_dir(); - let mut all_file_functions = AllFileFunctions { func_map: HashMap::new() }; - let system_prompt_opt = read_file("/app/prompts/prompt_function_lines"); - if system_prompt_opt.is_none() { - log::error!("[mermaid_comment] Unable to read prompt_function_lines"); - return None; - } - let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); - let system_prompt_end_opt = read_file("/app/prompts/prompt_function_boundary"); - if system_prompt_end_opt.is_none() { - log::error!("[mermaid_comment] Unable to read prompt_function_boundary"); - return None; - } - let system_prompt_lines_end = system_prompt_end_opt.expect("Empty system_prompt"); - for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) { - let path = entry.path(); - log::debug!("[generate_function_map] path = {:?}", path); - let ext = path.extension().and_then(|ext| ext.to_str()); - log::debug!("[generate_function_map] extension = {:?}", &ext); - if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { - let file_contents = std::fs::read_to_string(path).ok()?; - let lines = file_contents - .lines() - .enumerate() - .map(|(index, line)| format!("{} {}", index+1, line)) - .collect::>(); - let mut function_map = FunctionFileMap { - file_name: path.to_str().unwrap().to_string(), - functions: Vec::new(), - }; - // TODO - convert lines to numbered content - let chunks = lines.chunks(50); - - for chunk in chunks { - let chunk_str = chunk.join("\n"); - let function_defs_opt = get_function_defs_in_chunk(&chunk_str, &system_prompt_lines).await; - if function_defs_opt.is_none() { - log::error!("[get_function_defs_in_chunk] Unable to get functions from llm"); - continue; - } - let function_defs = function_defs_opt.expect("Empty function_defs"); - for func_def in function_defs.functions.iter() { - let func_boundary_opt = get_function_boundaries_in_chunk(&lines, func_def.line_num, &system_prompt_lines_end).await; - if func_boundary_opt.is_none() { - continue; - } - let func_boundary = func_boundary_opt.expect("Empty func_boundary_opt"); - function_map.functions.push(FuncDefInfo { - name: func_def.name.clone(), - line_start: func_def.line_num, - line_end: func_boundary.function_boundary as usize, - parent: func_def.parent.clone(), - }); - } - } - log::debug!("[generate_function_map] func_map = {:#?}", &function_map); - all_file_functions.func_map.insert(path.to_str().unwrap().to_string(), function_map); - } - } - return Some(all_file_functions); -} - -async fn get_function_defs_in_chunk(chunk: &str, system_prompt: &str) -> Option { - let llm_req = LlmFuncDefRequest { - input: LlmFuncDefInput { - language: "rust".to_string(), - chunk: chunk.to_string() - } - }; - let llm_req_res = serde_json::to_string(&llm_req); - if llm_req_res.is_err() { - log::error!("[get_function_defs_in_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); - return None; - } - let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); - let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", system_prompt, llm_req_prompt); - // match call_llm_api(prompt).await { - // None => { - // log::error!("[mermaid_comment] Failed to call LLM API"); - // return None; - // } - // Some(llm_response) => { - // // let funcdefs_res = serde_json::from_str(&llm_response); - // // if funcdefs_res.is_err() { - // // log::error!( - // // "[get_function_defs_in_chunk] funcdefs error: {}", - // // funcdefs_res.expect_err("Empty error in funcdefs_res")); - // // return None; - // // } - // // let funcdefs: LlmFuncDefResponse = funcdefs_res.expect("Uncaught error in funcdefs_res"); - // return Some(funcdefs); - // } - // } - let funcdefs = LlmFuncDefResponse{ functions: vec![LlmFuncDef{ name: "main".to_string(), line_num: 18, parent: "".to_string() }] }; - return Some(funcdefs); -} - -async fn get_function_boundaries_in_chunk(file_lines_numbered: &Vec, func_def_line_num: usize, system_prompt: &str) -> Option { - // divide lines into chunks and call with each chunk until line_end is found or files is empty - let chunk_size = 70; - let mut start = func_def_line_num; - - while start < file_lines_numbered.len() { - let end = std::cmp::min(start + chunk_size, file_lines_numbered.len()); - let chunk: Vec = file_lines_numbered[start..end].to_vec(); - let chunk_str = chunk.join("\n"); - - let input = LlmFuncBoundaryInput { - language: "rust".to_string(), // Assuming Rust as language, you can modify this as needed - func_declared: file_lines_numbered[func_def_line_num].to_string(), - chunk: chunk_str, - }; - let llm_req = LlmFuncBoundaryRequest { input }; - let llm_req_res = serde_json::to_string(&llm_req); - if llm_req_res.is_err() { - log::error!("[get_function_defs_in_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); - return None; - } - let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); - let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", system_prompt, llm_req_prompt); - // match call_llm_api(prompt).await { - // None => { - // log::error!("[mermaid_comment] Failed to call LLM API"); - // return None; - // } - // Some(llm_response) => { - // let func_resp_res = serde_json::from_str(&llm_response); - // if func_resp_res.is_err() { - // let e = func_resp_res.expect_err("Empty error func_resp_res"); - // log::error!("[get_function_boundaries_in_chunk] Unable to deserialize response"); - // return None; - // } - // let func_resp: LlmFuncBoundaryResponse = func_resp_res.expect("Uncaught error in func_resp_res"); - // if func_resp.function_boundary == -1 { - // start += chunk_size; - // continue; - // } - // return Some(func_resp); - // } - // } - let func_resp = LlmFuncBoundaryResponse { function_boundary: 79 }; - if func_resp.function_boundary == -1 { - start += chunk_size; - continue; - } - return Some(func_resp); - } - return None; -} \ No newline at end of file diff --git a/vibi-dpu/src/llm/mod.rs b/vibi-dpu/src/llm/mod.rs deleted file mode 100644 index c9f75f47..00000000 --- a/vibi-dpu/src/llm/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -pub mod utils; -pub mod gitops; -pub mod function_info; -pub mod mermaid_elements; -pub mod elements; -pub mod function_line_range; \ No newline at end of file diff --git a/vibi-dpu/src/main.rs b/vibi-dpu/src/main.rs index 66b5f06d..6649b54a 100644 --- a/vibi-dpu/src/main.rs +++ b/vibi-dpu/src/main.rs @@ -7,7 +7,7 @@ mod github; mod utils; mod logger; mod health; -mod llm; +mod graph; use github::auth::app_access_token; use health::status::send_status_start; use tokio::task; diff --git a/vibi-dpu/src/utils/gitops.rs b/vibi-dpu/src/utils/gitops.rs index 63083686..f6bcdf7e 100644 --- a/vibi-dpu/src/utils/gitops.rs +++ b/vibi-dpu/src/utils/gitops.rs @@ -7,6 +7,7 @@ use sha256::digest; use rand::distributions::Alphanumeric; use rand::{thread_rng, Rng}; use tokio::fs; +use tokio::task; use std::io::ErrorKind; use super::hunk::BlameItem; @@ -15,7 +16,7 @@ use super::lineitem::LineItem; use crate::db::repo::save_repo_to_db; use crate::utils::repo::Repository; -#[derive(Debug, Serialize, Default, Deserialize)] +#[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct StatItem { pub filepath: String, additions: i32, From 7cb5769e5bac957725917d5a2e4cfc7f676efc01 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Tue, 13 Aug 2024 02:15:50 +0530 Subject: [PATCH 18/43] identify deleted files in diff graph --- vibi-dpu/src/graph/graph_info.rs | 10 +++++----- vibi-dpu/src/graph/mermaid_elements.rs | 2 +- vibi-dpu/src/graph/utils.rs | 17 +++++++++++++---- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/vibi-dpu/src/graph/graph_info.rs b/vibi-dpu/src/graph/graph_info.rs index 46871c7a..8d98c6b7 100644 --- a/vibi-dpu/src/graph/graph_info.rs +++ b/vibi-dpu/src/graph/graph_info.rs @@ -71,20 +71,20 @@ pub async fn generate_full_graph(repo_dir: &str, review_key: &str, commit_id: &s return Some(graph_info); } -pub async fn generate_diff_graph(diff_files: &Vec) -> Option { - let diff_code_files_opt = source_diff_files(diff_files); +pub async fn generate_diff_graph(diff_files: &Vec) -> (Option, Option>) { + let (diff_code_files_opt, deleted_files_opt) = source_diff_files(diff_files); if diff_code_files_opt.is_none() { log::error!("[generate_diff_graph] Unable to get file paths for: {:#?}", diff_files); - return None; + return (None, deleted_files_opt); } let diff_code_files = diff_code_files_opt.expect("Empty diff_code_files_opt"); let graph_info_opt = generate_graph_info(&diff_code_files).await; if graph_info_opt.is_none() { log::error!("[generate_diff_graph] Unable to generate diff graph"); - return None; + return (None, deleted_files_opt); } let graph_info = graph_info_opt.expect("Empty graph_info_opt"); - return Some(graph_info); + return (Some(graph_info), deleted_files_opt); } fn added_functions_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs index 654f694f..0094f1c0 100644 --- a/vibi-dpu/src/graph/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -35,7 +35,7 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review let full_graph = full_graph_opt.expect("Empty full_graph_opt"); // generate diff graph for head commit id git_checkout_commit(review, review.pr_head_commit()); - let diff_graph_opt = generate_diff_graph(diff_files).await; + let (diff_graph_opt, deleted_files_opt) = generate_diff_graph(diff_files).await; if diff_graph_opt.is_none() { log::error!( "[generate_flowchart_elements] Unable to generate diff graph for review: {}", diff --git a/vibi-dpu/src/graph/utils.rs b/vibi-dpu/src/graph/utils.rs index 5ac53436..a0e31e09 100644 --- a/vibi-dpu/src/graph/utils.rs +++ b/vibi-dpu/src/graph/utils.rs @@ -136,19 +136,28 @@ pub fn all_code_files(dir: &str) -> Option> { return Some(code_files); } -pub fn source_diff_files(diff_files: &Vec) -> Option> { +pub fn source_diff_files(diff_files: &Vec) -> (Option>, Option>) { let mut code_files = Vec::::new(); + let mut deleted_files = Vec::::new(); for stat_item in diff_files { let filepath_str = &stat_item.filepath; let filepath = Path::new(filepath_str); if filepath.extension().and_then(|ext| ext.to_str()) == Some("rs") { code_files.push(filepath.to_path_buf()); } + if !filepath.exists() { + deleted_files.push(filepath.to_path_buf()); + } } - if code_files.is_empty() { - return None; + let mut code_files_retval = None; + let mut deleted_files_retval = None; + if !code_files.is_empty() { + code_files_retval = Some(code_files); } - return Some(code_files); + if !deleted_files.is_empty() { + deleted_files_retval = Some(deleted_files); + } + return (code_files_retval, deleted_files_retval); } pub fn numbered_content(file_contents: String) -> Vec { From 0db988d5a55356992be41517d2c5bf222868d0d7 Mon Sep 17 00:00:00 2001 From: Avikalp Kumar Gupta Date: Tue, 3 Sep 2024 01:36:13 +0530 Subject: [PATCH 19/43] separated the relevance & mermaid graph processing functions --- vibi-dpu/src/core/review.rs | 38 ++++++++++++++++++++++++++++++------ vibi-dpu/src/core/trigger.rs | 4 +--- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/vibi-dpu/src/core/review.rs b/vibi-dpu/src/core/review.rs index 0bf67561..687188ae 100644 --- a/vibi-dpu/src/core/review.rs +++ b/vibi-dpu/src/core/review.rs @@ -41,8 +41,20 @@ pub async fn process_review(message_data: &Vec) { } let access_token = access_token_opt.expect("Empty access_token_opt"); commit_check(&review, &access_token).await; - let hunkmap_opt = process_review_changes(&review).await; - send_hunkmap(&hunkmap_opt, &review, &repo_config, &access_token, &old_review_opt).await; + process_review_changes(&review, &repo_config, &access_token, &old_review_opt).await; +} + +pub async fn process_review_changes(review: &Review, repo_config: &RepoConfig, access_token: &str, old_review_opt: &Option) -> Option<(HunkMap, Vec, Vec)>{ + log::info!("Processing changes in code..."); + let (excluded_files, smallfiles) = get_included_and_excluded_files(&review).await; + if repo_config.comment() || repo_config.auto_assign() { + let hunkmap_opt = calculate_hunkmap(&review, &smallfiles).await; + send_hunkmap(&hunkmap_opt, &review, &repo_config, &access_token, &old_review_opt).await; + } + + if let Some(mermaid_graph) = generate_mermaid_graph(&excluded_files, &small_files, &review).await { + send_mermaid_graph(&mermaid_graph, &review, &access_token).await; + } } pub async fn send_hunkmap(hunkmap_opt: &Option<(HunkMap, Vec, Vec)>, review: &Review, @@ -73,9 +85,8 @@ fn hunk_already_exists(review: &Review) -> bool { log::debug!("[hunk_already_exists] Hunk already in db!"); return true; } -pub async fn process_review_changes(review: &Review) -> Option<(HunkMap, Vec, Vec)>{ - log::info!("Processing changes in code..."); - let mut prvec = Vec::::new(); + +fn get_included_and_excluded_files(review: &Review) -> Option<(Vec, Vec)> { let fileopt = get_excluded_files(&review); log::debug!("[process_review_changes] fileopt = {:?}", &fileopt); if fileopt.is_none() { @@ -83,6 +94,11 @@ pub async fn process_review_changes(review: &Review) -> Option<(HunkMap, Vec) -> Option { + let mut prvec = Vec::::new(); let diffmap = generate_diff(&review, &smallfiles); log::debug!("[process_review_changes] diffmap = {:?}", &diffmap); let linemap = process_diffmap(&diffmap); @@ -102,7 +118,17 @@ pub async fn process_review_changes(review: &Review) -> Option<(HunkMap, Vec, small_files: &Vec, review: &Review) -> Option { + let all_diff_files: Vec = excluded_files + .iter() + .chain(small_files.iter()) + .cloned() + .collect(); + + mermaid_comment(&all_diff_files, review).await } pub async fn commit_check(review: &Review, access_token: &str) { diff --git a/vibi-dpu/src/core/trigger.rs b/vibi-dpu/src/core/trigger.rs index 4a5775f7..947bef85 100644 --- a/vibi-dpu/src/core/trigger.rs +++ b/vibi-dpu/src/core/trigger.rs @@ -48,9 +48,7 @@ pub async fn process_trigger(message_data: &Vec) { // commit_check commit_check(&review, &access_token).await; // process_review_changes - let hunkmap_opt = process_review_changes(&review).await; - // send_hunkmap - send_hunkmap(&hunkmap_opt, &review, &repo_config, &access_token, &None).await; + process_review_changes(&review, &repo_config, &access_token, &None).await; } fn parse_message_fields(msg: &Value) -> Option { From bead6d2d2c327fb4425b578e888e44f05352e28c Mon Sep 17 00:00:00 2001 From: Avikalp Kumar Gupta Date: Tue, 3 Sep 2024 02:56:07 +0530 Subject: [PATCH 20/43] create diff_graph in core to handle DiffGraph creation directly from process_review_changes --- vibi-dpu/src/core/diff_graph.rs | 46 +++++++++++++++++++++++++++++++ vibi-dpu/src/core/mod.rs | 3 +- vibi-dpu/src/core/relevance.rs | 34 ++++------------------- vibi-dpu/src/core/review.rs | 41 ++++++++++++--------------- vibi-dpu/src/utils/repo_config.rs | 10 +++++-- 5 files changed, 78 insertions(+), 56 deletions(-) create mode 100644 vibi-dpu/src/core/diff_graph.rs diff --git a/vibi-dpu/src/core/diff_graph.rs b/vibi-dpu/src/core/diff_graph.rs new file mode 100644 index 00000000..491d1d99 --- /dev/null +++ b/vibi-dpu/src/core/diff_graph.rs @@ -0,0 +1,46 @@ +use crate::graph::mermaid_elements::generate_mermaid_flowchart; +use crate::utils::user::ProviderEnum; +use crate::utils::review::Review; +use crate::core::github; +use crate::utils::gitops::StatItem; + +pub async fn send_diff_graph(review: &Review, excluded_files: &Vec, small_files: &Vec, access_token: &str) { + let comment = diff_graph_comment_text(excluded_files, small_files, review).await; + // add comment for GitHub + if review.provider().to_string() == ProviderEnum::Github.to_string() { + log::info!("Inserting comment on repo {}...", review.repo_name()); + github::comment::add_comment(&comment, review, &access_token).await; + } + + // TODO: add comment for Bitbucket +} + +async fn diff_graph_comment_text(excluded_files: &Vec, small_files: &Vec, review: &Review) -> String { + let mut comment = "Relevant users for this PR:\n\n".to_string(); + + let all_diff_files: Vec = excluded_files + .iter() + .chain(small_files.iter()) + .cloned() // Clone the StatItem instances since `iter` returns references + .collect(); // Collect into a new vector + if let Some(mermaid_text) = mermaid_comment(&all_diff_files, review).await { + comment += mermaid_text.as_str(); + } + comment += "To modify DiffGraph settings, go to [your Vibinex settings page.](https://vibinex.com/settings)\n"; + return comment; +} + +async fn mermaid_comment(diff_files: &Vec, review: &Review) -> Option { + let flowchart_str_opt = generate_mermaid_flowchart(diff_files, review).await; + if flowchart_str_opt.is_none() { + log::error!("[mermaid_comment] Unable to generate flowchart for review: {}", review.id()); + return None; + } + let flowchart_str = flowchart_str_opt.expect("Empty flowchart_str_opt"); + let mermaid_comment = format!( + "### Call Stack Diff\n```mermaid\n{}\n```", + flowchart_str, + ); + return Some(mermaid_comment); +} + diff --git a/vibi-dpu/src/core/mod.rs b/vibi-dpu/src/core/mod.rs index a28ba3ea..5c685fbc 100644 --- a/vibi-dpu/src/core/mod.rs +++ b/vibi-dpu/src/core/mod.rs @@ -4,4 +4,5 @@ pub mod utils; pub mod approval; pub mod bitbucket; pub mod github; -pub mod trigger; \ No newline at end of file +pub mod trigger; +pub mod diff_graph; \ No newline at end of file diff --git a/vibi-dpu/src/core/relevance.rs b/vibi-dpu/src/core/relevance.rs index dab7d9f9..97807eae 100644 --- a/vibi-dpu/src/core/relevance.rs +++ b/vibi-dpu/src/core/relevance.rs @@ -1,10 +1,10 @@ use std::collections::{HashMap, HashSet}; -use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, graph::mermaid_elements::generate_mermaid_flowchart, utils::{aliases::get_login_handles, gitops::StatItem, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; +use crate::{bitbucket::{self, user::author_from_commit}, core::github, db::review::save_review_to_db, utils::{aliases::get_login_handles, gitops::StatItem, hunk::{HunkMap, PrHunkItem}, relevance::Relevance, user::ProviderEnum}}; use crate::utils::review::Review; use crate::utils::repo_config::RepoConfig; -pub async fn process_relevance(hunkmap: &HunkMap, excluded_files: &Vec, small_files: &Vec, review: &Review, +pub async fn process_relevance(hunkmap: &HunkMap, excluded_files: &Vec, review: &Review, repo_config: &mut RepoConfig, access_token: &str, old_review_opt: &Option, ) { log::info!("Processing relevance of code authors..."); @@ -22,8 +22,7 @@ pub async fn process_relevance(hunkmap: &HunkMap, excluded_files: &Vec let relevance_vec = relevance_vec_opt.expect("Empty coverage_obj_opt"); if repo_config.comment() { // create comment text - let comment = comment_text(&relevance_vec, repo_config.auto_assign(), - excluded_files, small_files, review).await; + let comment = relevant_reviewers_comment_text(&relevance_vec, repo_config.auto_assign(), excluded_files).await; // add comment if review.provider().to_string() == ProviderEnum::Bitbucket.to_string() { // TODO - add feature flag check @@ -185,8 +184,8 @@ async fn calculate_relevance(prhunk: &PrHunkItem, review: &mut Review) -> Option return Some(relevance_vec); } -async fn comment_text(relevance_vec: &Vec, auto_assign: bool, - excluded_files: &Vec, small_files: &Vec, review: &Review) -> String { +async fn relevant_reviewers_comment_text(relevance_vec: &Vec, auto_assign: bool, + excluded_files: &Vec) -> String { let mut comment = "Relevant users for this PR:\n\n".to_string(); // Added two newlines comment += "| Contributor Name/Alias | Relevance |\n"; // Added a newline at the end comment += "| -------------- | --------------- |\n"; // Added a newline at the end @@ -226,32 +225,9 @@ async fn comment_text(relevance_vec: &Vec, auto_assign: bool, comment += "If you are a relevant reviewer, you can use the [Vibinex browser extension](https://chromewebstore.google.com/detail/vibinex-code-review/jafgelpkkkopeaefadkdjcmnicgpcncc) to see parts of the PR relevant to you\n"; // Added a newline at the end comment += "Relevance of the reviewer is calculated based on the git blame information of the PR. To know more, hit us up at contact@vibinex.com.\n\n"; // Added two newlines comment += "To change comment and auto-assign settings, go to [your Vibinex settings page.](https://vibinex.com/u)\n"; // Added a newline at the end - let all_diff_files: Vec = excluded_files - .iter() - .chain(small_files.iter()) - .cloned() // Clone the StatItem instances since `iter` returns references - .collect(); // Collect into a new vector - if let Some(mermaid_text) = mermaid_comment(&all_diff_files, review).await { - comment += mermaid_text.as_str(); - } - return comment; } -pub async fn mermaid_comment(diff_files: &Vec, review: &Review) -> Option { - let flowchart_str_opt = generate_mermaid_flowchart(diff_files, review).await; - if flowchart_str_opt.is_none() { - log::error!("[mermaid_comment] Unable to generate flowchart for review: {}", review.id()); - return None; - } - let flowchart_str = flowchart_str_opt.expect("Empty flowchart_str_opt"); - let mermaid_comment = format!( - "### Call Stack Diff\n```mermaid\n{}\n```", - flowchart_str, - ); - return Some(mermaid_comment); -} - pub fn deduplicated_relevance_vec_for_comment(relevance_vec: &Vec) -> (HashMap, f32>, Vec) { let mut combined_relevance_map: HashMap, f32> = HashMap::new(); let mut unmapped_aliases = Vec::new(); diff --git a/vibi-dpu/src/core/review.rs b/vibi-dpu/src/core/review.rs index 687188ae..c54a2770 100644 --- a/vibi-dpu/src/core/review.rs +++ b/vibi-dpu/src/core/review.rs @@ -3,7 +3,7 @@ use std::{env, thread, time::Duration}; use serde_json::Value; use crate::{ - core::{relevance::process_relevance, utils::get_access_token}, + core::{relevance::process_relevance, diff_graph::send_diff_graph, utils::get_access_token}, db::{ hunk::{get_hunk_from_db, store_hunkmap_to_db}, repo::get_clone_url_clone_dir, @@ -44,33 +44,36 @@ pub async fn process_review(message_data: &Vec) { process_review_changes(&review, &repo_config, &access_token, &old_review_opt).await; } -pub async fn process_review_changes(review: &Review, repo_config: &RepoConfig, access_token: &str, old_review_opt: &Option) -> Option<(HunkMap, Vec, Vec)>{ +pub async fn process_review_changes(review: &Review, repo_config: &RepoConfig, access_token: &str, old_review_opt: &Option) { log::info!("Processing changes in code..."); - let (excluded_files, smallfiles) = get_included_and_excluded_files(&review).await; - if repo_config.comment() || repo_config.auto_assign() { - let hunkmap_opt = calculate_hunkmap(&review, &smallfiles).await; - send_hunkmap(&hunkmap_opt, &review, &repo_config, &access_token, &old_review_opt).await; - } - - if let Some(mermaid_graph) = generate_mermaid_graph(&excluded_files, &small_files, &review).await { - send_mermaid_graph(&mermaid_graph, &review, &access_token).await; + if let Some((excluded_files, smallfiles)) = get_included_and_excluded_files(review) { + if repo_config.comment() || repo_config.auto_assign() { + let hunkmap_opt = calculate_hunkmap(review, &smallfiles).await; + send_hunkmap(&hunkmap_opt, &excluded_files, review, repo_config, access_token, old_review_opt).await; + } + + if repo_config.diff_graph() { + send_diff_graph(review, &excluded_files, &smallfiles, access_token).await; + } + } else { + log::error!("Failed to get included and excluded files"); } } -pub async fn send_hunkmap(hunkmap_opt: &Option<(HunkMap, Vec, Vec)>, review: &Review, +pub async fn send_hunkmap(hunkmap_opt: &Option, excluded_files: &Vec, review: &Review, repo_config: &RepoConfig, access_token: &str, old_review_opt: &Option) { if hunkmap_opt.is_none() { log::error!("[send_hunkmap] Empty hunkmap in send_hunkmap"); return; } - let (hunkmap, excluded_files, small_files) = hunkmap_opt.as_ref().expect("empty hunkmap_opt"); + let hunkmap = hunkmap_opt.to_owned().expect("empty hunkmap_opt"); log::debug!("HunkMap = {:?}", &hunkmap); store_hunkmap_to_db(&hunkmap, review); publish_hunkmap(&hunkmap); let hunkmap_async = hunkmap.clone(); let review_async = review.clone(); let mut repo_config_clone = repo_config.clone(); - process_relevance(&hunkmap_async, excluded_files, small_files, &review_async, + process_relevance(&hunkmap_async, &excluded_files, &review_async, &mut repo_config_clone, access_token, old_review_opt).await; } @@ -97,7 +100,7 @@ fn get_included_and_excluded_files(review: &Review) -> Option<(Vec, Ve return Some(( excluded_files, smallfiles)); } -async fn calculate_hunkmap(review: &Review, smallfiles: Vec) -> Option { +async fn calculate_hunkmap(review: &Review, smallfiles: &Vec) -> Option { let mut prvec = Vec::::new(); let diffmap = generate_diff(&review, &smallfiles); log::debug!("[process_review_changes] diffmap = {:?}", &diffmap); @@ -121,16 +124,6 @@ async fn calculate_hunkmap(review: &Review, smallfiles: Vec) -> Option return Some(hunkmap); } -async fn generate_mermaid_graph(excluded_files: &Vec, small_files: &Vec, review: &Review) -> Option { - let all_diff_files: Vec = excluded_files - .iter() - .chain(small_files.iter()) - .cloned() - .collect(); - - mermaid_comment(&all_diff_files, review).await -} - pub async fn commit_check(review: &Review, access_token: &str) { if !commit_exists(&review.base_head_commit(), &review.clone_dir()) || !commit_exists(&review.pr_head_commit(), &review.clone_dir()) { diff --git a/vibi-dpu/src/utils/repo_config.rs b/vibi-dpu/src/utils/repo_config.rs index 3f74e42a..432a8fd5 100644 --- a/vibi-dpu/src/utils/repo_config.rs +++ b/vibi-dpu/src/utils/repo_config.rs @@ -3,7 +3,8 @@ use serde::{Serialize, Deserialize}; #[derive(Debug, Serialize, Deserialize, Clone)] pub struct RepoConfig { comment: bool, - auto_assign: bool + auto_assign: bool, + diff_graph: bool } impl RepoConfig { @@ -16,11 +17,16 @@ impl RepoConfig { self.auto_assign } + pub fn diff_graph(&self) -> bool { + self.diff_graph + } + // Function to create a default RepoConfig pub fn default() -> Self { RepoConfig { comment: true, - auto_assign: true + auto_assign: true, + diff_graph: false } } } \ No newline at end of file From a4a7e1ee8659ad27111037b159a3f188ae2271bf Mon Sep 17 00:00:00 2001 From: Avikalp Kumar Gupta Date: Tue, 3 Sep 2024 15:49:05 +0530 Subject: [PATCH 21/43] fix: review: remove the repo-config conditions from calling send_hunk_map function --- vibi-dpu/src/core/review.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vibi-dpu/src/core/review.rs b/vibi-dpu/src/core/review.rs index c54a2770..5b0887c8 100644 --- a/vibi-dpu/src/core/review.rs +++ b/vibi-dpu/src/core/review.rs @@ -47,10 +47,8 @@ pub async fn process_review(message_data: &Vec) { pub async fn process_review_changes(review: &Review, repo_config: &RepoConfig, access_token: &str, old_review_opt: &Option) { log::info!("Processing changes in code..."); if let Some((excluded_files, smallfiles)) = get_included_and_excluded_files(review) { - if repo_config.comment() || repo_config.auto_assign() { - let hunkmap_opt = calculate_hunkmap(review, &smallfiles).await; - send_hunkmap(&hunkmap_opt, &excluded_files, review, repo_config, access_token, old_review_opt).await; - } + let hunkmap_opt = calculate_hunkmap(review, &smallfiles).await; + send_hunkmap(&hunkmap_opt, &excluded_files, review, repo_config, access_token, old_review_opt).await; if repo_config.diff_graph() { send_diff_graph(review, &excluded_files, &smallfiles, access_token).await; From c7bba0a39fb5586f9b54c8c88665104602167be7 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Tue, 3 Sep 2024 15:50:03 +0530 Subject: [PATCH 22/43] Implement edge rendering and adjacency list --- vibi-dpu/Cargo.toml | 1 + vibi-dpu/src/db/graph_info.rs | 12 +- vibi-dpu/src/graph/elements.rs | 184 +++++--- vibi-dpu/src/graph/file_imports.rs | 17 +- vibi-dpu/src/graph/function_call.rs | 137 ++++++ vibi-dpu/src/graph/function_info.rs | 286 ------------ vibi-dpu/src/graph/function_line_range.rs | 51 ++- vibi-dpu/src/graph/gitops.rs | 150 ++++++- vibi-dpu/src/graph/graph_edges.rs | 325 +++++++++++++- vibi-dpu/src/graph/graph_info.rs | 506 +++++++++++++++------- vibi-dpu/src/graph/mermaid_elements.rs | 449 +++++++++---------- vibi-dpu/src/graph/mod.rs | 3 +- vibi-dpu/src/graph/utils.rs | 32 +- 13 files changed, 1368 insertions(+), 785 deletions(-) create mode 100644 vibi-dpu/src/graph/function_call.rs delete mode 100644 vibi-dpu/src/graph/function_info.rs diff --git a/vibi-dpu/Cargo.toml b/vibi-dpu/Cargo.toml index a021f9bc..2939593d 100644 --- a/vibi-dpu/Cargo.toml +++ b/vibi-dpu/Cargo.toml @@ -38,4 +38,5 @@ jsonwebtoken = "8.3.0" # MIT fern = "0.6.2" # MIT log = "0.4.20" # MIT/Apache2 walkdir = "2.5.0" # Unlicence/MIT +strsim = "0.11.1" #MIT # todo - check all lib licences diff --git a/vibi-dpu/src/db/graph_info.rs b/vibi-dpu/src/db/graph_info.rs index 0e0d1d2c..8b349574 100644 --- a/vibi-dpu/src/db/graph_info.rs +++ b/vibi-dpu/src/db/graph_info.rs @@ -1,11 +1,11 @@ use sled::IVec; -use crate::{db::config::get_db, graph::graph_info::GraphInfo}; -pub fn save_graph_info_to_db(review_key: &str, commit_id: &str, graph_info: &GraphInfo) { +use crate::{db::config::get_db, graph::file_imports::AllFileImportInfo}; +pub fn save_import_info_to_db(review_key: &str, commit_id: &str, all_imports: &AllFileImportInfo) { let db = get_db(); let graph_info_key = format!("graph_info/{}/{}", review_key, commit_id); // Serialize repo struct to JSON - let json = serde_json::to_vec(graph_info).expect("Failed to serialize review"); + let json = serde_json::to_vec(all_imports).expect("Failed to serialize review"); // Insert JSON into sled DB let insert_res = db.insert(IVec::from(graph_info_key.as_bytes()), json); if insert_res.is_err() { @@ -13,10 +13,10 @@ pub fn save_graph_info_to_db(review_key: &str, commit_id: &str, graph_info: &Gra log::error!("[save_graph_info_to_db] Failed to upsert graph info into sled DB: {e}"); return; } - log::debug!("[save_graph_info_to_db] Graph Info succesfully upserted: {:#?}", graph_info); + log::debug!("[save_graph_info_to_db] Graph Info succesfully upserted: {:#?}", all_imports); } -pub fn get_graph_info_from_db(review_key: &str, commit_id: &str) -> Option { +pub fn get_import_info_from_db(review_key: &str, commit_id: &str) -> Option { let db = get_db(); let graph_info_key = format!("graph_info/{}/{}", review_key, commit_id); let graph_info_res = db.get(IVec::from(graph_info_key.as_bytes())); @@ -40,6 +40,6 @@ pub fn get_graph_info_from_db(review_key: &str, commit_id: &str) -> Option>>, - mermaid_id: String + nodes: HashMap>>, + mermaid_id: String, } impl MermaidSubgraph { // Constructor pub fn new(name: String) -> Self { let mermaid_id = generate_random_string(4); - Self { name, nodes: HashMap::new(), mermaid_id } + Self { + name, + nodes: HashMap::new(), + mermaid_id, + } } // Getter for nodes - pub fn nodes(&self) -> &HashMap>> { - self.nodes.borrow() + pub fn nodes(&self) -> &HashMap>> { + &self.nodes } pub fn mermaid_id(&self) -> &String { @@ -27,28 +36,35 @@ impl MermaidSubgraph { } // Setter for nodes - pub fn set_nodes(&mut self, nodes: HashMap>>) { + pub fn set_nodes(&mut self, nodes: HashMap>>) { self.nodes = nodes; } - pub fn add_node(&mut self, node: Rc>) { + pub fn add_node(&mut self, node: &Arc>) { + let node_owned = Arc::clone(node); let function_name = { - let node_borrowed: Ref = RefCell::borrow(&*node); + let node_borrowed = node_owned.lock().unwrap(); node_borrowed.function_name().to_string() }; if self.nodes.contains_key(&function_name) { log::error!( "[add_node] Node already exists: old - {:#?}, new - {:#?}", - &self.nodes[&function_name], node); + &self.nodes[&function_name], + node + ); return; } - self.nodes.insert(function_name, node); + self.nodes.insert(function_name, node_owned); + } + + pub fn get_node(&self, func_name: &str) -> Option<&Arc>> { + self.nodes.get(func_name) } - pub fn render_subgraph(&self) -> String{ + pub fn render_subgraph(&self) -> String { let mut all_nodes = Vec::new(); for (_, node) in self.nodes() { - let node_borrowed = RefCell::borrow(&*node); + let node_borrowed = node.lock().unwrap(); all_nodes.push(node_borrowed.render_node()); } let subgraph_str = format!( @@ -57,8 +73,7 @@ impl MermaidSubgraph { self.name, all_nodes.join("\n") ); - // self.set_subgraph_str(Some(subgraph_str)); - return subgraph_str; + subgraph_str } } @@ -70,9 +85,12 @@ pub struct MermaidNode { impl MermaidNode { // Constructor - pub fn new( function_name: String) -> Self { + pub fn new(function_name: String) -> Self { let mermaid_id = generate_random_string(4); - Self { mermaid_id, function_name } + Self { + mermaid_id, + function_name, + } } // Getter for function_name @@ -92,30 +110,35 @@ impl MermaidNode { pub fn render_node(&self) -> String { let node_str = format!("\t{}[{}]", &self.mermaid_id, &self.function_name); - // self.set_node_str(Some(node_str.clone())); - return node_str; + node_str } } #[derive(Debug, Default, Clone)] pub struct MermaidEdge { line: usize, - caller_function: Rc>, - called_function: Rc>, + caller_function: Arc>, + called_function: Arc>, color: String, } impl MermaidEdge { // Constructor - pub fn new(line: usize, caller_function: &Rc>, called_function: &Rc>, color: String) -> Self { + pub fn new( + line: usize, + caller_function: &Arc>, + called_function: &Arc>, + color: String, + ) -> Self { Self { line, - caller_function: Rc::clone(caller_function), - called_function: Rc::clone(called_function), - color } + caller_function: Arc::clone(caller_function), + called_function: Arc::clone(called_function), + color, + } } - // Getter for edge_str + // Getter for line pub fn line(&self) -> usize { self.line } @@ -136,25 +159,20 @@ impl MermaidEdge { pub fn render_edge_definition(&self) -> String { let (caller_str, called_str) = { - let caller_borrowed: Ref = RefCell::borrow(&*self.caller_function); - let called_borrowed: Ref = RefCell::borrow(&*self.called_function); - (caller_borrowed.function_name().to_string(), called_borrowed.function_name().to_string()) + let caller_borrowed = self.caller_function.lock().unwrap(); + let called_borrowed = self.called_function.lock().unwrap(); + ( + caller_borrowed.function_name().to_string(), + called_borrowed.function_name().to_string(), + ) }; - let edge_str = format!( - "\t{} -- Line {} --> {}\n", - caller_str, - self.line, - called_str, - ); - return edge_str; + let edge_str = format!("\t{} -- Line {} --> {}\n", caller_str, self.line, called_str); + edge_str } pub fn render_edge_style(&self) -> String { - let style_str = format!( - "stroke:{},stroke-width:4px;", - self.color() - ); - return style_str; + let style_str = format!("stroke:{},stroke-width:4px;", self.color()); + style_str } } @@ -172,16 +190,61 @@ impl MermaidGraphElements { } } - pub fn add_edge(&mut self, edge: MermaidEdge, from_subgraph: &MermaidSubgraph, to_subgraph: &MermaidSubgraph) { + pub fn subgraph_for_file(&self, file: &str) -> Option<&MermaidSubgraph> { + self.subgraphs.get(file) + } + + pub fn add_edge( + &mut self, + edge_color: &str, + line: usize, + source_func_name: &str, + dest_func_name: &str, + source_file: &str, + dest_file: &str, + ) { + let source_node: Arc>; + let dest_node: Arc>; + + if let Some(subgraph) = self.subgraphs.get_mut(source_file) { + if let Some(node) = subgraph.get_node(source_func_name) { + source_node = Arc::clone(node); + } else { + let node = MermaidNode::new(source_func_name.to_string()); + source_node = Arc::new(Mutex::new(node)); + subgraph.add_node(&source_node); + } + } else { + let node = MermaidNode::new(source_func_name.to_string()); + source_node = Arc::new(Mutex::new(node)); + let mut subgraph = MermaidSubgraph::new(source_file.to_string()); + subgraph.add_node(&source_node); + self.add_subgraph(subgraph); + } + + if let Some(subgraph) = self.subgraphs.get_mut(dest_file) { + if let Some(node) = subgraph.get_node(dest_func_name) { + dest_node = Arc::clone(node); + } else { + let node = MermaidNode::new(dest_func_name.to_string()); + dest_node = Arc::new(Mutex::new(node)); + subgraph.add_node(&dest_node); + } + } else { + let node = MermaidNode::new(dest_func_name.to_string()); + dest_node = Arc::new(Mutex::new(node)); + let mut subgraph = MermaidSubgraph::new(dest_file.to_string()); + subgraph.add_node(&dest_node); + self.add_subgraph(subgraph); + } + + let edge = MermaidEdge::new(line, &source_node, &dest_node, edge_color.to_string()); self.edges.push(edge); - self.add_subgraph(from_subgraph); - self.add_subgraph(to_subgraph); } - fn add_subgraph(&mut self, subgraph: &MermaidSubgraph) { + fn add_subgraph(&mut self, subgraph: MermaidSubgraph) { if !self.subgraphs.contains_key(subgraph.mermaid_id()) { - self.subgraphs.insert(subgraph.mermaid_id().to_string(), - subgraph.to_owned()); + self.subgraphs.insert(subgraph.mermaid_id().to_string(), subgraph); } } @@ -190,27 +253,22 @@ impl MermaidGraphElements { let mut all_edges_style = Vec::::new(); for (idx, edge) in self.edges.iter().enumerate() { all_edges.push(edge.render_edge_definition()); - all_edges_style.push( - format!("\tlinkStyle {} {}", idx, edge.render_edge_style()) - ); + all_edges_style.push(format!("\tlinkStyle {} {}", idx, edge.render_edge_style())); } - let all_edges_str = format!( - "{}{}", - all_edges.join("\n"), - all_edges_style.join("\n") - ); - return all_edges_str; + let all_edges_str = format!("{}{}", all_edges.join("\n"), all_edges_style.join("\n")); + all_edges_str } fn render_subgraphs(&self) -> String { - return self.subgraphs.values().map( - |subgraph| subgraph.render_subgraph() - ).collect::>().join("\n"); + self.subgraphs + .values() + .map(|subgraph| subgraph.render_subgraph()) + .collect::>() + .join("\n") } pub fn render_elements(&self) -> String { - let all_elements_str = format!("{}\n{}", - &self.render_subgraphs(), &self.render_edges()); - return all_elements_str; + let all_elements_str = format!("{}\n{}", &self.render_subgraphs(), &self.render_edges()); + all_elements_str } -} \ No newline at end of file +} diff --git a/vibi-dpu/src/graph/file_imports.rs b/vibi-dpu/src/graph/file_imports.rs index 9984751d..33ff5b71 100644 --- a/vibi-dpu/src/graph/file_imports.rs +++ b/vibi-dpu/src/graph/file_imports.rs @@ -46,7 +46,17 @@ impl PartialEq for ImportPath { fn eq(&self, other: &Self) -> bool { self.import_line == other.import_line && self.import_path == other.import_path && self.imported == other.imported } -} +} + +impl ImportPath { + pub fn import_path(&self) -> &String { + &self.import_path + } + + pub fn imported(&self) -> &String { + &self.imported + } +} #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct ChunkImportInfo { @@ -98,6 +108,10 @@ impl AllFileImportInfo { pub fn file_import_info(&self, filename: &str) -> Option<&FileImportInfo> { self.file_import_map.get(filename) } + + pub fn file_import_map(&self) -> &HashMap { + &self.file_import_map + } } pub async fn get_import_lines(file_paths: &Vec) -> Option { @@ -116,6 +130,7 @@ pub async fn get_import_lines(file_paths: &Vec) -> Option +} + +impl FunctionCallChunk { + pub fn function_calls(&self) -> &Vec { + &self.function_calls + } +} + +#[derive(Debug, Serialize, Default, Deserialize, Clone)] +pub struct FunctionCallInput { + pub language: String, + pub chunk: String, + pub function_name: String +} + +pub async fn function_calls_in_chunk(chunk: &str, func_name: &str) -> Option{ + let system_prompt_opt = read_file("/app/prompts/prompt_function_call"); + if system_prompt_opt.is_none() { + log::error!("[function_calls_in_chunk] Unable to read prompt_function_call"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let func_call_input = FunctionCallInput{ + language: "rust".to_string(), + chunk: chunk.to_string(), + function_name: func_name.to_string() }; + let func_call_input_res = serde_json::to_string(&func_call_input); + if func_call_input_res.is_err() { + let e = func_call_input_res.expect_err("Empty error in func_call_input_res"); + log::error!("[function_calls_in_chunk] Error serializing func call input: {:?}", e); + return None; + } + let func_call_input_str = func_call_input_res.expect("Uncaught error in func_call_input_res"); + let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", + system_prompt_lines, &func_call_input_str); + let prompt_response_opt = call_llm_api(prompt).await; + if prompt_response_opt.is_none() { + log::error!("[function_calls_in_chunk] Unable to call llm for chunk: {:?}", chunk); + return None; + } + let prompt_response = prompt_response_opt.expect("Empty prompt_response_opt"); + let deserialized_response = serde_json::from_str(&prompt_response); + if deserialized_response.is_err() { + let e = deserialized_response.expect_err("Empty error in deserialized_response"); + log::error!("[function_calls_in_chunk] Error in deserializing response: {:?}", e); + return None; + } + let func_call_chunk: FunctionCallChunk = deserialized_response.expect("Uncuaght error in deserialized_response"); + if func_call_chunk.function_calls.is_empty() { + log::debug!("No function calls in this chunk: {:?}", chunk); + return None; + } + return Some(func_call_chunk); +} + +pub async fn function_calls_in_file(filepath: &PathBuf, func_name: &str) -> Option> { + let mut all_func_calls = Vec::::new(); + let file_contents = std::fs::read_to_string(filepath.clone()).ok()?; + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(50); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + let func_call_chunk_opt = function_calls_in_chunk(&chunk_str, func_name).await; + if func_call_chunk_opt.is_none() { + log::debug!("[function_calls_in_file] No function call in chunk for file: {:?}", filepath); + continue; + } + let func_call_chunk = func_call_chunk_opt.expect("Empty func_call_chunk_opt"); + all_func_calls.push(func_call_chunk); + } + if all_func_calls.is_empty() { + log::debug!("[function_calls_in_file] No function call in file: {:?}, {:?}", filepath, func_name); + return None; + } + return Some(all_func_calls); +} + +// pub async fn function_calls_in_hunks(hunk_file_map: &HunkDiffMap) -> Option>>> { +// let mut file_func_call_map: HashMap>> = HashMap::new(); +// for (file, hunk_lines_vec) in hunk_file_map.file_line_map() { +// let file_contents_res = std::fs::read_to_string(file.clone()); +// if file_contents_res.is_err() { +// let e = file_contents_res.expect_err("Empty error in file_contents_res"); +// log::error!("[function_calls_in_hunks] Error in getting file contents: {:?}", e); +// continue; +// } +// let file_contents = file_contents_res.expect("Uncaught error in file_contents_res"); +// let numbered_content = numbered_content(file_contents); +// let file_path = Path::new(file); +// let file_vec = vec![file_path.to_path_buf()]; +// let imports_opt = get_import_lines(&file_vec).await; +// if imports_opt.is_none() { +// log::debug!("[function_calls_in_hunks] No imports in file: {:?}", file); +// continue; +// } +// let file_imports = imports_opt.expect("Empty imports_opt"); +// let file_import_info = file_imports.file_import_info(file).expect("Empty file_import_info"); +// let mut func_call_map: HashMap> = HashMap::new(); +// for import_info in file_import_info.all_import_paths() { +// let func_name = import_info.imported(); +// // TODO FIXME - get numbered content for hunk +// for hunk_lines in hunk_lines_vec { +// let mut func_call_vec: Vec = Vec::new(); +// let hunk_chunk_vec = &numbered_content[hunk_lines.start_line().. hunk_lines.end_line()]; +// for hunk_chunk in hunk_chunk_vec.chunks(30) { +// let hunk_str = hunk_chunk.join("\n"); +// if let Some(func_calls) = function_calls_in_chunk(&hunk_str, func_name).await { +// func_call_vec.extend(func_calls.function_calls()); +// } +// } +// if !func_call_vec.is_empty() { +// func_call_map.entry(func_name.to_string()).or_insert_with(Vec::new).extend(func_call_vec); +// } +// // get func name from imports +// // TODO - git checkout before function call + + +// } +// } +// if !func_call_map.is_empty() { +// file_func_call_map.insert(file.to_string(), func_call_map); +// } +// } +// if file_func_call_map.is_empty() { +// return None; +// } +// return Some(file_func_call_map); +// } \ No newline at end of file diff --git a/vibi-dpu/src/graph/function_info.rs b/vibi-dpu/src/graph/function_info.rs deleted file mode 100644 index 9ccc0dcf..00000000 --- a/vibi-dpu/src/graph/function_info.rs +++ /dev/null @@ -1,286 +0,0 @@ -use serde::{Deserialize, Serialize}; - -use super::utils::{call_llm_api, get_specific_lines, read_file}; - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmFunctionLineMapResponse { - functions: Option> -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct FunctionLineMap { - pub name: String, - pub line_start: i32, - pub line_end: i32, - pub inside: String -} - -impl FunctionLineMap { - pub fn new(name: &str, line_start: i32, line_end: i32, inside: &str) -> Self { - FunctionLineMap { - name: name.to_string(), - line_start, - line_end, - inside: inside.to_string(), - } - } -} - -pub async fn extract_function_lines(numbered_content: &str, file_name: &str) -> Option> { - let system_prompt_opt = read_file("/app/prompts/prompt_function_lines"); - if system_prompt_opt.is_none() { - log::error!("[mermaid_comment] Unable to read system prompt"); - return None; - } - let system_prompt = system_prompt_opt.expect("Empty system_prompt_opt"); - let mut flines = Vec::::new(); - // split numbered content and start for loop - // Split the numbered_content into lines - let lines: Vec<&str> = numbered_content.lines().collect(); - - // Determine the batch size - let batch_size = 30; - let mut prev_state: Option = None; - // Iterate over the lines in chunks of batch_size - for (chunk_idx, chunk) in lines.chunks(batch_size).enumerate() { - // create prompt - // call llm api - let mut prev_state_str = "{}".to_string(); - if prev_state.is_some() { - if let Ok(res_str) = serde_json::to_string(&prev_state) { - prev_state_str = res_str; - } - } - let prompt = format!( - "{}\n\n### User Message\nInput -\nprev_state = {}\n{}\n{}\n\nOutput -", - system_prompt, - &prev_state_str, - file_name, - chunk.join("\n") - ); - log::debug!("[extract_function_lines] prev_state_str = {}", &prev_state_str); - match call_llm_api(prompt).await { - None => { - log::error!("[mermaid_comment] Failed to call LLM API"); - return None; - } - Some(llm_response) => { - let mut unparsed_res = llm_response; - // parse response to FunctionLineMap - if unparsed_res.contains("```json") { - unparsed_res = extract_json_from_llm_response(&unparsed_res); - } - let flinemap_opt = clean_and_deserialize(&unparsed_res); - log::debug!("[extract_function_lines] flinemap_res {:?} ", &flinemap_opt); - if flinemap_opt.is_none() { - log::error!( - "[extract_function_lines] Unable to clean and deserialize llm response: {:?}", - &unparsed_res); - continue; - } - let flinemapresp: LlmFunctionLineMapResponse = flinemap_opt.expect("Uncaught error in flinemap_res"); - // add to vec - if flinemapresp.functions.is_some() { - let functions_arr = flinemapresp.functions.expect("Empty functions"); - if !functions_arr.is_empty() { - if let Some(func_obj) = functions_arr.last() { - let last_line_chunk = ((batch_size * (chunk_idx + 1)) - 1) as i32; - log::debug!( - "[extract_function_lines] last_line_chunk = {}, func_obj.line_end = {} ", - last_line_chunk, func_obj.line_end); - if func_obj.line_end == last_line_chunk { - prev_state = Some(func_obj.clone()); - } - } - flines.extend(functions_arr); - } - } - } - } - } - if flines.is_empty() { - log::error!("[extract_function_lines] No functions extracted"); - return None; - } - let parsed_flines = process_flinemap_response(&flines, lines.len()); - return Some(parsed_flines); -} - -fn clean_and_deserialize(json_str: &str) -> Option { - let mut cleaned_str = json_str.to_string(); - while !cleaned_str.is_empty() { - match serde_json::from_str(&cleaned_str) { - Ok(parsed) => return Some(parsed), - Err(e) if e.to_string().contains("trailing characters") => { - cleaned_str.pop(); // Remove the last character and try again - } - Err(e) => return None, - } - } - None -} - -fn extract_json_from_llm_response(llm_response: &str) -> String { - let start_delim = "```json"; - let end_delim = "```"; - // Find the starting index of the JSON part - let start_index = llm_response.find(start_delim).expect("find operation failed for ```json"); - // Find the ending index of the JSON part - let end_index = llm_response[start_index + start_delim.len()..].find(end_delim).expect("find for ``` failed"); - - // Extract the JSON part - llm_response[start_index + start_delim.len()..start_index + start_delim.len() + end_index].trim().to_string() -} - -fn process_flinemap_response(flines: &Vec, total_lines: usize) -> Vec { - log::debug!("[process_flinemap_response] flines = {:?}", &flines); - let mut resolved_flines: Vec = vec![]; - for flinemap in flines { - if flinemap.name == "unknown" { - if !resolved_flines.is_empty() { - let fline_len = resolved_flines.len(); - resolved_flines[fline_len - 1].line_end = flinemap.line_end; - continue; - } - } - resolved_flines.push(flinemap.to_owned()); - } - if let Some(last_flinemap) = resolved_flines.last() { - if last_flinemap.line_end == -1 { - let fline_len = resolved_flines.len(); - resolved_flines[fline_len - 1].line_end = total_lines as i32; - } - } - log::debug!("[process_flinemap_response] resolved_flines = {:?}", &resolved_flines); - return resolved_flines; -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmCalledFunctionResponse { - functions: Option> -} - -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct CalledFunction { - pub name: String, - pub line: usize -} - -pub async fn extract_function_calls(hunk_lines: &Vec<(usize, usize)>, numbered_content: &str, file_name: &str) -> Option> { - // extract hunk lines from numbered content - let user_prompt = get_specific_lines( - hunk_lines.to_owned(), numbered_content); - // prepare prompt and call llm api - let system_prompt_opt = read_file("/app/prompts/prompt_function_calls"); - if system_prompt_opt.is_none() { - log::error!("[extract_function_calls] Unable to read system prompt /app/prompt_function_calls"); - return None; - } - let system_prompt = system_prompt_opt.expect("Empty system_prompt_opt"); - let prompt = format!( - "{}\n\n### User Message\nInput -\n{}\n{}\n\nOutput -", - &system_prompt, - file_name, - &user_prompt - ); - match call_llm_api(prompt).await { - None => { - log::error!("[extract_function_calls] Failed to call LLM API"); - return None; - } - Some(llm_response) => { - // optional - paginate - let mut unparsed_res = llm_response; - // parse response to FunctionLineMap - if unparsed_res.contains("```json") { - unparsed_res = extract_json_from_llm_response(&unparsed_res); - } - let called_functions_res = serde_json::from_str(&unparsed_res); - if called_functions_res.is_err() { - let e = called_functions_res.expect_err("Empty error in called_functions_res"); - log::error!( - "[extract_function_calls] Unable to deserialize called_functions: {:?}", e); - return None; - } - let called_func_response: LlmCalledFunctionResponse = called_functions_res.expect("Uncaught error in called_functions_res"); - return called_func_response.functions; - } - } -} - -#[derive(Debug, Default, Deserialize, Clone)] -struct LlmCalledFunctionPathResponse { - functions: Option> -} - -#[derive(Debug, Default, Deserialize, Clone)] -pub struct CalledFunctionPath { - pub import_path: String, - pub function_name: String, - import_line: u32 -} - -pub async fn extract_function_import_path(called_funcs: &Vec, numbered_content: &str, file_name: &str) -> Option> { - let system_prompt_opt = read_file("/app/prompts/prompt_function_call_path"); - if system_prompt_opt.is_none() { - log::error!("[extract_function_calls] Unable to read system prompt /app/prompt_function_calls"); - return None; - } - let system_prompt = system_prompt_opt.expect("Empty system_prompt_opt"); - let mut user_prompt = String::new(); - // search in numbered content for called functions - let numbered_lines: Vec<&str> = numbered_content.lines().collect(); - for called_func in called_funcs { - // extract hunk lines from numbered content or get it as input - let first_occurence_line_opt = find_first_occurence(&numbered_lines, &called_func.name, called_func.line); - if first_occurence_line_opt.is_none() { - log::debug!("[extract_function_import_path] No first occurence found for: {}", &called_func.name); - continue; - } - let first_occurence_line = first_occurence_line_opt.expect("Empty first_occurence_line_opt"); - user_prompt.push_str(first_occurence_line.as_str()); - user_prompt.push_str("\n"); - user_prompt.push_str(numbered_lines[called_func.line]); - user_prompt.push_str("\n"); - } - // prepare prompt with hunk lines and occurences and call llm api - let prompt = format!( - "{}\n\n### User Message\nInput -\n{}\n{}\n\nOutput -", - &system_prompt, - file_name, - &user_prompt - ); - // extract CalledFunctionPath vec from responses and return - match call_llm_api(prompt).await { - None => { - log::error!("[extract_function_import_path] Failed to call LLM API"); - return None; - } - Some(llm_response) => { - let mut unparsed_res = llm_response; - // parse response to FunctionLineMap - if unparsed_res.contains("```json") { - unparsed_res = extract_json_from_llm_response(&unparsed_res); - } - let called_functions_res = serde_json::from_str(&unparsed_res); - if called_functions_res.is_err() { - let e = called_functions_res.expect_err("Empty error in called_functions_res"); - log::error!( - "[extract_function_calls] Unable to deserialize called_functions: {:?}", e); - return None; - } - let called_func_paths_res: LlmCalledFunctionPathResponse = called_functions_res.expect("Uncaught error in called_functions_res"); - return called_func_paths_res.functions; - } - } - // optional - paginate -} - -fn find_first_occurence(lines: &Vec<&str>, func_name: &str, hunk_line: usize) -> Option { - for (idx, line) in lines.iter().enumerate() { - if idx+1 != hunk_line && line.contains(func_name) { - return Some(line.to_owned().to_owned()); - } - } - return None; -} \ No newline at end of file diff --git a/vibi-dpu/src/graph/function_line_range.rs b/vibi-dpu/src/graph/function_line_range.rs index 38299035..ce7b90cd 100644 --- a/vibi-dpu/src/graph/function_line_range.rs +++ b/vibi-dpu/src/graph/function_line_range.rs @@ -5,7 +5,7 @@ use walkdir::WalkDir; use crate::{graph::utils::numbered_content, utils::review::Review}; -use super::utils::{all_code_files, call_llm_api, read_file}; +use super::{gitops::HunkDiffLines, utils::{all_code_files, call_llm_api, read_file}}; #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FuncDefInfo { @@ -21,6 +21,20 @@ impl PartialEq for FuncDefInfo { } } +impl FuncDefInfo { + pub fn name(&self) -> &String { + &self.name + } + + pub fn line_start(&self) -> usize { + self.line_start + } + + pub fn line_end(&self) -> usize { + self.line_end + } +} + #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FunctionFileMap { file_name: String, @@ -37,6 +51,41 @@ impl FunctionFileMap { pub fn is_func_in_file(&self, func: &FuncDefInfo) -> bool { self.functions.contains(func) } + + pub fn func_def(&self, func_name: &str) -> Option<&FuncDefInfo> { + self.functions.iter().find(|f| f.name == func_name) + } + + pub fn func_at_line(&self, line_num: usize) -> Option<&FuncDefInfo> { + self.functions.iter().find( + |f| f.line_start <= line_num && line_num <= f.line_end) + } + + pub fn funcs_in_hunk(&self, hunk: &HunkDiffLines) -> Vec { + self.functions + .iter() + .filter(|func| { + // Check if the function's start or end line falls within the hunk's start and end line range + (func.line_start() >= hunk.start_line() && func.line_start() <= hunk.end_line()) || + (func.line_end() >= hunk.start_line() && func.line_end() <= hunk.end_line()) || + // Additionally check if the function completely spans over the hunk range + (func.line_start() <= hunk.start_line() && func.line_end() >= hunk.end_line()) + }).cloned() + .collect() + } + + pub fn funcs_for_lines(&self, lines: &Vec) -> HashMap { + let mut line_funcdef_map = HashMap::new(); + + for line in lines { + for func in &self.functions { + if func.line_start <= *line && *line <= func.line_end { + line_funcdef_map.entry(*line).or_insert(func.clone()); + } + } + } + return line_funcdef_map; + } } #[derive(Debug, Serialize, Default, Deserialize, Clone)] diff --git a/vibi-dpu/src/graph/gitops.rs b/vibi-dpu/src/graph/gitops.rs index 9ee2dc30..464d110e 100644 --- a/vibi-dpu/src/graph/gitops.rs +++ b/vibi-dpu/src/graph/gitops.rs @@ -1,16 +1,79 @@ -use std::{collections::HashMap, process::Command, str}; +use std::{collections::HashMap, path::PathBuf, process::Command, str::{self, FromStr}}; use crate::utils::{gitops::StatItem, review::Review}; -pub fn get_changed_files(small_files: &Vec, review: &Review) -> (HashMap>, HashMap>) { - // Replace this with actual logic to get changed files in the PR - let mut add_hunks_map = HashMap::>::new(); - let mut del_hunks_map = HashMap::>::new(); +#[derive(Debug, Default, Clone)] +pub struct HunkDiffLines { + start_line: usize, + end_line: usize, + content: Vec, +} + +impl HunkDiffLines { + pub fn start_line(&self) -> usize { + self.start_line + } + + pub fn end_line(&self) -> usize { + self.end_line + } +} + +#[derive(Debug, Default, Clone)] +pub struct FileHunks { + deleted_hunks: Vec, + added_hunks: Vec +} + +impl FileHunks { + pub fn deleted_hunks(&self) -> &Vec { + &self.deleted_hunks + } + + pub fn added_hunks(&self) -> &Vec { + &self.added_hunks + } +} + +#[derive(Debug, Default, Clone)] +pub struct HunkDiffMap { + file_line_map: HashMap, +} + +impl HunkDiffMap { + pub fn file_line_map(&self) -> &HashMap { + &self.file_line_map + } + + pub fn all_files(&self) -> Vec<&String> { + self.file_line_map.keys().collect::>() + } + + pub fn all_files_pathbuf(&self) -> Vec { + self.file_line_map.keys() + .filter_map(|s| { + // Try to convert each &str to a PathBuf + let s_pathbuf_res = PathBuf::from_str(s); + match s_pathbuf_res { + Ok(pathbuf) => Some(pathbuf), + Err(_) => None, + } + }) + .collect::>() + } + + pub fn file_hunks(&self, filename: &str) -> Option<&FileHunks> { + self.file_line_map.get(filename) + } +} + +pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> HunkDiffMap { + let mut file_hunk_map = HunkDiffMap{file_line_map: HashMap::new()}; let prev_commit = review.base_head_commit(); let curr_commit = review.pr_head_commit(); let clone_dir = review.clone_dir(); - for item in small_files { + for item in diff_files { let filepath = item.filepath.as_str(); let commit_range = format!("{}...{}", prev_commit, curr_commit); log::debug!("[extract_hunks] | clone_dir = {:?}, filepath = {:?}", clone_dir, filepath); @@ -37,11 +100,39 @@ pub fn get_changed_files(small_files: &Vec, review: &Review) -> (HashM let diffstr = diffstr_res.expect("Uncaught error in diffstr_res"); log::debug!("[extract_hunks] diffstr = {}", &diffstr); - let mut add_hunks = Vec::new(); - let mut del_hunks = Vec::new(); + let mut current_add_content = Vec::new(); + let mut current_del_content = Vec::new(); + let mut current_add_start = 0; + let mut current_del_start = 0; + let mut current_add_end = 0; + let mut current_del_end = 0; + let mut in_add_hunk = false; + let mut in_del_hunk = false; + let mut file_hunks = FileHunks {deleted_hunks: Vec::new(), added_hunks: Vec::new()}; for line in diffstr.lines() { if line.starts_with("@@") { + // Save previous hunks if any + if in_add_hunk { + file_hunks.added_hunks.push(HunkDiffLines { + start_line: current_add_start, + end_line: current_add_end, + content: current_add_content.clone(), + }); + } + if in_del_hunk { + file_hunks.deleted_hunks.push(HunkDiffLines { + start_line: current_del_start, + end_line: current_del_end, + content: current_del_content.clone(), + }); + } + // Reset states for next hunk + in_add_hunk = false; + in_del_hunk = false; + current_add_content.clear(); + current_del_content.clear(); + let parts: Vec<&str> = line.split_whitespace().collect(); if parts.len() > 2 { let del_hunk = parts[1]; @@ -49,33 +140,54 @@ pub fn get_changed_files(small_files: &Vec, review: &Review) -> (HashM if del_hunk.starts_with('-') { if let Some((start, len)) = parse_hunk_range(del_hunk) { - let end = start + len - 1; - del_hunks.push((start, end)); + current_del_start = start; + current_del_end = start + len - 1; + in_del_hunk = true; } } if add_hunk.starts_with('+') { if let Some((start, len)) = parse_hunk_range(add_hunk) { - let end = start + len - 1; - add_hunks.push((start, end)); + current_add_start = start; + current_add_end = start + len - 1; + in_add_hunk = true; } } } + } else if line.starts_with('-') { + if in_del_hunk { + current_del_content.push(line[1..].to_string()); + } + } else if line.starts_with('+') { + if in_add_hunk { + current_add_content.push(line[1..].to_string()); + } } } - if !add_hunks.is_empty() { - add_hunks_map.insert(filepath.to_string(), add_hunks); + // Push the last hunks + if in_add_hunk { + file_hunks.added_hunks.push(HunkDiffLines { + start_line: current_add_start, + end_line: current_add_end, + content: current_add_content.clone(), + }); } - if !del_hunks.is_empty() { - del_hunks_map.insert(filepath.to_string(), del_hunks); + if in_del_hunk { + file_hunks.deleted_hunks.push(HunkDiffLines { + start_line: current_del_start, + end_line: current_del_end, + content: current_del_content.clone(), + }); } + + file_hunk_map.file_line_map.insert(filepath.to_string(), file_hunks); } - (del_hunks_map, add_hunks_map) + + return file_hunk_map; } fn parse_hunk_range(hunk: &str) -> Option<(usize, usize)> { - let hunk = hunk.trim_start_matches(&['-', '+'][..]); let parts: Vec<&str> = hunk.split(',').collect(); if parts.len() == 1 { @@ -91,4 +203,4 @@ fn parse_hunk_range(hunk: &str) -> Option<(usize, usize)> { } } None -} \ No newline at end of file +} diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs index 167ec78f..8e1387dd 100644 --- a/vibi-dpu/src/graph/graph_edges.rs +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -1,19 +1,324 @@ -async fn graph_edges() { - let incoming_edges = incoming_edges().await; - let outgoing_edges = outgoing_edges().await; - let graph = edge_nodes().await; +use std::{path::{Path, PathBuf}, str::FromStr}; +use crate::utils::{gitops::git_checkout_commit, review::Review}; + +use super::{elements::MermaidGraphElements, file_imports::{AllFileImportInfo, ImportPath}, function_call::function_calls_in_file, function_line_range::{generate_function_map, FuncDefInfo, FunctionFileMap}, graph_info::DiffGraph, utils::match_overlap}; + +pub async fn graph_edges(review: &Review, all_import_info: &AllFileImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { + incoming_edges(review, all_import_info, diff_graph, graph_elems).await; + outgoing_edges(all_import_info, diff_graph, graph_elems).await; +} + +async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { + for (dest_filename, func_defs) in diff_graph.diff_func_defs() { + for dest_func in func_defs.added_func_defs() { + git_checkout_commit(review, review.pr_head_commit()); + // search in diff graph + for (source_filename, file_func_defs) in diff_graph.all_file_imports().file_import_map() { + let file_imports = file_func_defs.all_import_paths(); + for file_import in file_imports { + // search for correct import + if match_import_condition(dest_filename, &file_import, dest_func) { + // find func call + let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // call func in that takes vec of lines and returns funcdefs + let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + for (line_num, source_func_def) in source_func_defs { + if source_func_def != dest_func.to_owned() { + graph_elems.add_edge("green", + line_num.to_owned(), + &source_func_def.name(), + &dest_func.name(), + &source_filename, + dest_filename); + } + } + } + } + } + } + git_checkout_commit(review, review.base_head_commit()); + // search in full graph + for (source_filename, file_func_defs) in all_import_info.file_import_map() { + let file_imports = file_func_defs.all_import_paths(); + for file_import in file_imports { + // search for correct import + if match_import_condition(dest_filename, &file_import, dest_func) { + // if found, create edge + let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // call func in that takes vec of lines and returns funcdefs + let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + for (line_num, source_func_def) in source_func_defs { + if source_func_def != dest_func.to_owned() { + graph_elems.add_edge("green", + line_num.to_owned(), + &source_func_def.name(), + &dest_func.name(), + &source_filename, + dest_filename); + } + } + } + } + } + } + } + for dest_func in func_defs.deleted_func_defs() { + // search in diff graph + for (source_filename, file_func_defs) in diff_graph.all_file_imports().file_import_map() { + let file_imports = file_func_defs.all_import_paths(); + for file_import in file_imports { + // search for correct import + if match_import_condition(dest_filename, &file_import, dest_func) { + // find func call + git_checkout_commit(review, review.pr_head_commit()); + let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // call func in that takes vec of lines and returns funcdefs + let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + for (line_num, source_func_def) in source_func_defs { + if source_func_def != dest_func.to_owned() { + graph_elems.add_edge("red", + line_num.to_owned(), + &source_func_def.name(), + &dest_func.name(), + &source_filename, + dest_filename); + } + } + } + } + } + } + // search in full graph + for (source_filename, file_func_defs) in all_import_info.file_import_map() { + let file_imports = file_func_defs.all_import_paths(); + for file_import in file_imports { + // search for correct import + if match_import_condition(dest_filename, &file_import, dest_func) { + // if found, create edge + let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // call func in that takes vec of lines and returns funcdefs + let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + for (line_num, source_func_def) in source_func_defs { + if source_func_def != dest_func.to_owned() { + graph_elems.add_edge("red", + line_num.to_owned(), + &source_func_def.name(), + &dest_func.name(), + &source_filename, + dest_filename); + } + } + } + } + } + } + } + } +} + +// async fn generate_incoming_edges(modified_funcs: &HashMap>, full_graph: &GraphInfo, diff_graph: &GraphInfo, color: &str, graph_elems: &mut MermaidGraphElements) { +// for (dest_filename, dest_func_info_vec) in modified_funcs.iter() { +// for dest_func_info in dest_func_info_vec { +// search_imports_in_graph(&dest_filename, dest_func_info, +// full_graph, color, graph_elems).await; +// search_imports_in_graph(&dest_filename, dest_func_info, +// diff_graph, color, graph_elems).await; +// } +// } +// } + +// async fn search_imports_in_graph(dest_filename: &str, dest_func_info: &FuncDefInfo, search_graph: &GraphInfo, color: &str, graph_elems: &mut MermaidGraphElements) { +// for source_filename in search_graph.import_info().files() { +// if let Some(source_file_imports) = search_graph.import_info().file_import_info(source_filename) { +// let file_imports = source_file_imports.all_import_paths(); +// for import_obj in file_imports { +// if match_import_condition(dest_filename, &import_obj, dest_func_info) { +// if let Some(source_func_file_map) = search_graph.function_info().functions_in_file(source_filename) { +// add_edge_for_file(source_filename, source_func_file_map, dest_filename, dest_func_info, color, graph_elems).await; +// } +// } +// } +// } +// } +// } + +fn match_import_condition(dest_filename: &str, import_obj: &ImportPath, dest_func_info: &FuncDefInfo) -> bool { + match_overlap( + &dest_filename, + &import_obj.import_path(), + 0.5) + && match_overlap(&dest_func_info.name(), + &import_obj.imported(), + 0.5) } -async fn incoming_edges() { - // find incoming edges from full_graph to diff_graph - // find incoming green edges from diff_graph to diff_graph +async fn add_edge_for_file(source_filename: &str, source_func_def: &FuncDefInfo, dest_filename: &str, dest_func_info: &FuncDefInfo, color: &str, graph_elems: &mut MermaidGraphElements) { + // TODO FIXME - do git commit checkout + let filepath = Path::new(source_filename); + let file_pathbuf = filepath.to_path_buf(); + if let Some(func_call_chunk) = + function_calls_in_file(&file_pathbuf, &dest_func_info.name()).await + { + for source_chunk_call in func_call_chunk { + for source_func_line in source_chunk_call.function_calls() { + if source_func_def != dest_func_info { + graph_elems.add_edge(color, + source_func_line.to_owned(), + &source_func_def.name(), + &dest_func_info.name(), + &source_filename, + dest_filename); + } + } + } + } } -async fn outgoing_edges() { - // find outgoing edges from diff_graph to full_graph - // find outgoing edges from diff_graph to diff_graph +async fn outgoing_edges(all_import_info: &AllFileImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { + // TODO - git checkout + for (source_filename, func_calls) in diff_graph.diff_func_calls() { + for source_func_call in func_calls.added_calls() { + let dest_filename = source_func_call.import_info().import_path(); + let func_name = source_func_call.import_info().imported(); + let lines = source_func_call.call_info().iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + // send this file for getting func defs + // search in diff graph + let diff_file_funcdefs = diff_graph.all_file_func_defs(); + // identify this particular func + if let Some(func_defs) = diff_file_funcdefs.functions_in_file(dest_filename) { + let source_func_defs = func_defs.funcs_for_lines(&lines); + for dest_func_def in func_defs.functions() { + if match_import_condition(dest_filename, source_func_call.import_info(), dest_func_def) { + // add edge + for (line_num, source_func_def) in &source_func_defs { + graph_elems.add_edge("green", + line_num.to_owned(), + source_func_def.name(), + dest_func_def.name(), + source_filename, + dest_filename); + } + } + } + } + // search in full graph + let dest_filepath = PathBuf::from_str(dest_filename).expect("Unable to get path"); + if let Some(all_file_funcdefs) = generate_function_map(&vec![dest_filepath]).await { + // identify this particular func + if let Some(func_defs) = all_file_funcdefs.functions_in_file(dest_filename) { + let source_func_defs = func_defs.funcs_for_lines(&lines); + for dest_func_def in func_defs.functions() { + if match_import_condition(dest_filename, source_func_call.import_info(), dest_func_def) { + // add edge + for (line_num, source_func_def) in &source_func_defs { + graph_elems.add_edge("green", + line_num.to_owned(), + source_func_def.name(), + dest_func_def.name(), + source_filename, + dest_filename); + } + } + } + } + } + } + // do same for deleted_calls + for source_func_call in func_calls.deleted_calls() { + let dest_filename = source_func_call.import_info().import_path(); + let func_name = source_func_call.import_info().imported(); + let diff_file_funcdefs = diff_graph.all_file_func_defs(); + let lines = source_func_call.call_info().iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + // identify this particular func + if let Some(func_defs) = diff_file_funcdefs.functions_in_file(dest_filename) { + let source_func_defs = func_defs.funcs_for_lines(&lines); + for dest_func_def in func_defs.functions() { + if match_import_condition(dest_filename, source_func_call.import_info(), dest_func_def) { + // add edge + for (line_num, source_func_def) in &source_func_defs { + graph_elems.add_edge("red", + line_num.to_owned(), + source_func_def.name(), + dest_func_def.name(), + source_filename, + dest_filename); + } + // add_edge_for_file(source_filename, _, + // dest_filename, dest_func_def, "red", graph_elems).await; + } + } + } + // send this file for getting func defs + let dest_filepath = PathBuf::from_str(dest_filename).expect("Unable to get path"); + if let Some(all_file_funcdefs) = generate_function_map(&vec![dest_filepath]).await { + // identify this particular func + if let Some(func_defs) = all_file_funcdefs.functions_in_file(dest_filename) { + let source_func_defs = func_defs.funcs_for_lines(&lines); + for dest_func_def in func_defs.functions() { + if match_import_condition(dest_filename, source_func_call.import_info(), dest_func_def) { + // add edge + for (line_num, source_func_def) in &source_func_defs { + graph_elems.add_edge("red", + line_num.to_owned(), + source_func_def.name(), + dest_func_def.name(), + source_filename, + dest_filename); + } + // add_edge_for_file(source_filename, _, + // dest_filename, dest_func_def, "red", graph_elems).await; + } + } + } + } + } + } } +// async fn generate_outgoing_edges(modified_imports: &HashMap>, full_graph: &GraphInfo, diff_graph: &GraphInfo, color: &str, graph_elems: &mut MermaidGraphElements) { +// for (dest_filename, dest_import_info) in modified_imports.iter() { +// let filepath = Path::new(dest_filename); +// let file_pathbuf = filepath.to_path_buf(); +// for dest_import in dest_import_info { +// search_funcs_in_graph(full_graph, dest_import, &file_pathbuf, color, dest_filename, graph_elems).await; +// // TODO FIXME - think about similar edges being searched from both full and diff graph. How to avoid adding them repeatedly? +// search_funcs_in_graph(diff_graph, dest_import, &file_pathbuf, color, dest_filename, graph_elems).await; +// } +// } +// } + +// async fn search_funcs_in_graph(search_graph: &GraphInfo, dest_import: &ImportPath, file_pathbuf: &PathBuf, color: &str, dest_file: &str, graph_elems: &mut MermaidGraphElements) { +// for source_file in search_graph.function_info().all_files() { +// if match_overlap(&source_file, &dest_import.imported(), 0.5) { +// if let Some(source_file_func_calls) = +// function_calls_in_file(&file_pathbuf, &dest_import.imported()).await +// { +// if let Some(func_file_map) = +// search_graph.function_info().functions_in_file(source_file) +// { +// for func_call_chunk in source_file_func_calls { +// for source_file_line in func_call_chunk.function_calls() { +// if let Some(source_func_def) = func_file_map.func_at_line(source_file_line.to_owned()) { +// if source_func_def.name() != dest_import.imported() { +// graph_elems.add_edge(color, source_file_line.to_owned(), &source_func_def.name(), &dest_import.imported(), source_file, dest_file) +// } +// } +// } +// } +// } +// } +// } +// } +// } + async fn edge_nodes() { // render all edges and their nodes } \ No newline at end of file diff --git a/vibi-dpu/src/graph/graph_info.rs b/vibi-dpu/src/graph/graph_info.rs index 8d98c6b7..49a87f69 100644 --- a/vibi-dpu/src/graph/graph_info.rs +++ b/vibi-dpu/src/graph/graph_info.rs @@ -2,206 +2,384 @@ use std::{collections::HashMap, path::PathBuf}; use serde::{Deserialize, Serialize}; -use crate::{db::graph_info::{get_graph_info_from_db, save_graph_info_to_db}, graph::{file_imports::get_import_lines, function_line_range::generate_function_map, utils::all_code_files}, utils::gitops::StatItem}; +use crate::{db::graph_info::{get_import_info_from_db, save_import_info_to_db}, graph::{file_imports::get_import_lines, function_line_range::generate_function_map, utils::all_code_files}, utils::{gitops::StatItem, review::Review}}; -use super::{file_imports::{AllFileImportInfo, ImportPath}, function_line_range::{AllFileFunctions, FuncDefInfo}, utils::source_diff_files}; +use super::{file_imports::{AllFileImportInfo, ImportPath}, function_call::{function_calls_in_file, FunctionCallChunk}, function_line_range::{AllFileFunctions, FuncDefInfo}, gitops::{get_changed_hunk_lines, HunkDiffMap}, utils::source_diff_files}; -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct DiffInfo { - added_funcs: Option>>, // key is filename - deleted_funcs: Option>>, // key is filename - added_imports: Option>>, // key is filename - deleted_imports: Option>> // key is filename -} +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct DiffInfo { +// added_funcs: Option>>, // key is filename +// deleted_funcs: Option>>, // key is filename +// added_imports: Option>>, // key is filename +// deleted_imports: Option>> // key is filename +// } + +// impl DiffInfo { +// pub fn added_funcs(&self) -> &Option>> { +// &self.added_funcs +// } + +// pub fn deleted_funcs(&self) -> &Option>> { +// &self.deleted_funcs +// } + +// pub fn added_imports(&self) -> &Option>> { +// &self.added_imports +// } + +// pub fn deleted_imports(&self) -> &Option>> { +// &self.deleted_imports +// } +// } -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct GraphInfo { - function_info: AllFileFunctions, - import_info: AllFileImportInfo +// async fn generate_graph_info(source_file_paths: &Vec) -> Option { +// // let function_map_opt = generate_function_map(source_file_paths).await; +// // if function_map_opt.is_none() { +// // log::error!("[generate_graph_info] Unable to generate function map"); +// // return None; +// // } +// // let function_map = function_map_opt.expect("Empty function_map_opt"); +// // log::debug!("[generate_graph_info] func map = {:?}", &function_map); +// let all_file_import_info_opt = get_import_lines(source_file_paths).await; +// if all_file_import_info_opt.is_none() { +// log::error!("[generate_graph_info] Unable to get import info for source files: {:#?}", source_file_paths); +// return None; +// } +// let all_file_import_info = all_file_import_info_opt.expect("Empty import_lines_opt"); +// let graph_info = GraphInfo { function_info: function_map, +// import_info: all_file_import_info }; +// return Some(graph_info); +// } + +// pub async fn generate_full_graph(repo_dir: &str, review_key: &str, commit_id: &str) -> Option { +// // check for graph db +// if let Some(graph_info) = get_import_info_from_db(review_key, commit_id) { +// return Some(graph_info); +// } +// let repo_code_files_opt = all_code_files(repo_dir); +// if repo_code_files_opt.is_none() { +// log::error!("[generate_full_graph] Unable to get file paths: {}", repo_dir); +// return None; +// } +// let repo_code_files = repo_code_files_opt.expect("Empty repo_code_files_opt"); +// let graph_info_opt = generate_graph_info(&repo_code_files).await; +// if graph_info_opt.is_none() { +// log::error!("[generate_full_graph] Unable to generate full graph for commit: {}", commit_id); +// return None; +// } +// let graph_info = graph_info_opt.expect("Empty graph_info_opt"); +// // save all this to db +// save_import_info_to_db(review_key, commit_id, &graph_info); +// return Some(graph_info); +// } + +#[derive(Debug, Default, Clone)] +pub struct DiffFuncDefs { + added_func_defs: Vec, + deleted_func_defs: Vec } -impl GraphInfo { - pub fn function_info(&self) -> &AllFileFunctions { - &self.function_info +impl DiffFuncDefs { + pub fn extend_added_funcdefs(&mut self, add_funcdefs: Vec) { + self.added_func_defs.extend(add_funcdefs); } - pub fn import_info(&self) -> &AllFileImportInfo { - &self.import_info + pub fn extend_deleted_funcdefs(&mut self, del_funcdefs: Vec) { + self.deleted_func_defs.extend(del_funcdefs); } + + pub fn added_func_defs(&self) -> &Vec { + &self.added_func_defs + } + + pub fn deleted_func_defs(&self) -> &Vec { + &self.deleted_func_defs + } +} +#[derive(Debug, Default, Clone)] +pub struct FuncCall { + import_info: ImportPath, + call_info: Vec } -async fn generate_graph_info(source_file_paths: &Vec) -> Option { - let function_map_opt = generate_function_map(source_file_paths).await; - if function_map_opt.is_none() { - log::error!("[generate_graph_info] Unable to generate function map"); - return None; +impl FuncCall { + pub fn import_info(&self) -> &ImportPath { + &self.import_info } - let function_map = function_map_opt.expect("Empty function_map_opt"); - log::debug!("[generate_graph_info] func map = {:?}", &function_map); - let all_file_import_info_opt = get_import_lines(source_file_paths).await; - if all_file_import_info_opt.is_none() { - log::error!("[generate_graph_info] Unable to get import info for source files: {:#?}", source_file_paths); - return None; + pub fn call_info(&self) -> &Vec { + &self.call_info } - let all_file_import_info = all_file_import_info_opt.expect("Empty import_lines_opt"); - let graph_info = GraphInfo { function_info: function_map, - import_info: all_file_import_info }; - return Some(graph_info); } -pub async fn generate_full_graph(repo_dir: &str, review_key: &str, commit_id: &str) -> Option { - // check for graph db - if let Some(graph_info) = get_graph_info_from_db(review_key, commit_id) { - return Some(graph_info); +#[derive(Debug, Default, Clone)] +pub struct DiffFuncCall { + added_calls: Vec, + deleted_calls: Vec +} + +impl DiffFuncCall { + pub fn add_added_calls(&mut self, add_calls: FuncCall) { + self.added_calls.push(add_calls); } - let repo_code_files_opt = all_code_files(repo_dir); - if repo_code_files_opt.is_none() { - log::error!("[generate_full_graph] Unable to get file paths: {}", repo_dir); - return None; + + pub fn add_deleted_calls(&mut self, del_calls: FuncCall) { + self.deleted_calls.push(del_calls); } - let repo_code_files = repo_code_files_opt.expect("Empty repo_code_files_opt"); - let graph_info_opt = generate_graph_info(&repo_code_files).await; - if graph_info_opt.is_none() { - log::error!("[generate_full_graph] Unable to generate full graph for commit: {}", commit_id); - return None; + + pub fn added_calls(&self) -> &Vec { + &self.added_calls + } + + pub fn deleted_calls(&self) -> &Vec { + &self.deleted_calls } - let graph_info = graph_info_opt.expect("Empty graph_info_opt"); - // save all this to db - save_graph_info_to_db(review_key, commit_id, &graph_info); - return Some(graph_info); } -pub async fn generate_diff_graph(diff_files: &Vec) -> (Option, Option>) { - let (diff_code_files_opt, deleted_files_opt) = source_diff_files(diff_files); - if diff_code_files_opt.is_none() { - log::error!("[generate_diff_graph] Unable to get file paths for: {:#?}", diff_files); - return (None, deleted_files_opt); +#[derive(Debug, Default, Clone)] +pub struct DiffGraph { + diff_files_func_defs: AllFileFunctions, + diff_files_imports: AllFileImportInfo, + diff_func_defs: HashMap, + diff_func_calls: HashMap +} + +impl DiffGraph { + pub fn add_func_def(&mut self, filename: String, diff_func_defs: DiffFuncDefs) { + self.diff_func_defs.insert(filename, diff_func_defs); } - let diff_code_files = diff_code_files_opt.expect("Empty diff_code_files_opt"); - let graph_info_opt = generate_graph_info(&diff_code_files).await; - if graph_info_opt.is_none() { - log::error!("[generate_diff_graph] Unable to generate diff graph"); - return (None, deleted_files_opt); + + pub fn add_diff_func_calls(&mut self, filename: String, diff_func_calls: DiffFuncCall) { + self.diff_func_calls.insert(filename, diff_func_calls); } - let graph_info = graph_info_opt.expect("Empty graph_info_opt"); - return (Some(graph_info), deleted_files_opt); -} -fn added_functions_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { - let mut added_funcs = HashMap::>::new(); - for filename in diff_graph.function_info().all_files() { - let func_map_opt = full_graph.function_info().functions_in_file(filename); - if func_map_opt.is_none() { - if let Some(diff_func_map) = diff_graph.function_info().functions_in_file(filename) { - let funcs_vec = diff_func_map.functions().to_owned(); - added_funcs.entry(filename.to_string()) - .or_insert_with(Vec::new) - .extend(funcs_vec); - } - } else { - let full_func_map = func_map_opt.expect("Empty func_map_opt"); - if let Some(diff_func_map) = diff_graph.function_info().functions_in_file(filename) { - for func in diff_func_map.functions() { - if !full_func_map.is_func_in_file(func) { - added_funcs.entry(filename.to_string()) - .or_insert_with(Vec::new) - .push(func.to_owned()); - } - } - } - } + pub fn all_file_func_defs(&self) -> &AllFileFunctions { + &self.diff_files_func_defs } - if added_funcs.is_empty() { - return None; + + pub fn all_file_imports(&self) -> &AllFileImportInfo { + &self.diff_files_imports } - return Some(added_funcs); -} -fn deleted_functions_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { - let mut deleted_funcs = HashMap::>::new(); - for filename in diff_graph.function_info().all_files() { - // TODO - full file deleted? - let funcs_opt = full_graph.function_info().functions_in_file(filename); - if funcs_opt.is_none() { - // file added - } - let full_funcs = funcs_opt.expect("Empty funcs_opt"); - let diff_funcs = diff_graph.function_info().functions_in_file(filename).expect("Empty diff_funcs"); - for func in full_funcs.functions() { - if diff_funcs.is_func_in_file(func) { - deleted_funcs.entry(filename.to_string()) - .or_insert_with(Vec::new) - .push(func.to_owned()); - } - } + pub fn diff_func_defs(&self) -> &HashMap { + &self.diff_func_defs } - if deleted_funcs.is_empty() { - return None; + + pub fn diff_func_calls(&self) -> &HashMap { + &self.diff_func_calls } - return Some(deleted_funcs) } -fn added_imports_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { - let mut added_imports = HashMap::>::new(); - for filename in diff_graph.import_info().files() { - let diff_imports = diff_graph - .import_info() - .file_import_info(filename).expect("Empty diff imports"); - let full_imports_opt = full_graph - .import_info().file_import_info(filename); - if full_imports_opt.is_none() { - added_imports.entry(filename.to_string()) - .or_insert_with(Vec::new) - .extend(diff_imports.all_import_paths()); - } else { - for import_path in diff_imports.all_import_paths() { - if !full_graph.import_info().is_import_in_file(filename, &import_path) { - added_imports.entry(filename.to_string()) - .or_insert_with(Vec::new) - .push(import_path); - } - } - } - } - if added_imports.is_empty() { +pub async fn generate_diff_graph(diff_files: &Vec, review: &Review) -> Option { + let diff_code_files_opt = source_diff_files(diff_files); + if diff_code_files_opt.is_none() { + log::debug!("[generate_diff_graph] No relevant source diff files in: {:#?}", diff_files); return None; } - return Some(added_imports); + let diff_code_files = diff_code_files_opt.expect("Empty diff_code_files_opt"); + let hunk_diff_map = get_changed_hunk_lines(&diff_code_files, review); + let diff_graph_opt = process_hunk_diff(&hunk_diff_map).await; + return diff_graph_opt; + // let diff_code_files_pathbuf: Vec = diff_code_files + // .iter() + // .filter_map(|s| { + // // Try to convert each &str to a PathBuf + // let s_pathbuf_res = PathBuf::from_str(&s.filepath); + // match s_pathbuf_res { + // Ok(pathbuf) => Some(pathbuf), + // Err(_) => None, + // } + // }) + // .collect(); + // let graph_info_opt = generate_graph_info(&diff_code_files_pathbuf).await; + // if graph_info_opt.is_none() { + // log::error!("[generate_diff_graph] Unable to generate diff graph"); + // return (None, deleted_files_opt); + // } + // let graph_info = graph_info_opt.expect("Empty graph_info_opt"); + // // return (Some(graph_info), deleted_files_opt); + // return None; } -fn deleted_imports_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { - let mut deleted_imports = HashMap::>::new(); - // TODO - file deleted - for filename in diff_graph.import_info().files() { - let full_imports_opt = full_graph.import_info().file_import_info(filename); - if full_imports_opt.is_none() { - // file added - } - let full_imports = full_imports_opt.expect("Empty full_imports_opt"); - for import_path in full_imports.all_import_paths() { - if !diff_graph.import_info().is_import_in_file(filename, &import_path) { - deleted_imports.entry(filename.to_string()) - .or_insert_with(Vec::new) - .push(import_path); - } - } +async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap) -> Option { + let all_files = hunk_diff_map.all_files_pathbuf(); + let all_file_func_defs_opt = generate_function_map(&all_files).await; + let all_file_imports_opt = get_import_lines(&all_files).await; + // TODO FIXME - opt logic + if all_file_func_defs_opt.is_none() { + log::debug!("[process_hunk_diff] Unable to generate func definitions diff map"); + return None; } - if deleted_imports.is_empty() { + if all_file_imports_opt.is_none() { + log::debug!("[process_hunk_diff] Unable to generate func imports diff map"); return None; } - return Some(deleted_imports); + let all_file_func_defs = all_file_func_defs_opt.expect("Empty all_file_func_defs_opt)"); + let all_file_imports = all_file_imports_opt.expect("Empty all_file_imports_opt"); + let mut diff_graph = DiffGraph { + diff_files_func_defs: all_file_func_defs, + diff_files_imports: all_file_imports, + diff_func_defs: HashMap::new(), + diff_func_calls: HashMap::new(), + }; + for filepath in all_files { + let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); + let mut diff_func_defs = DiffFuncDefs { + added_func_defs: Vec::new(), deleted_func_defs: Vec::new()}; + let mut diff_func_calls = DiffFuncCall { + added_calls: Vec::new(), deleted_calls: Vec::new()}; + if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + for hunk_diff in file_line_map.added_hunks() { + if let Some(funcs_map) = diff_graph.all_file_func_defs().functions_in_file(filename) { + // find func_defs for files in hunks + let funcs_def_vec = funcs_map.funcs_in_hunk(hunk_diff); + if !funcs_def_vec.is_empty() { + // add func def vec to something with file as key + diff_func_defs.extend_added_funcdefs(funcs_def_vec); + } + } + } + for hunk_diff in file_line_map.deleted_hunks() { + if let Some(funcs_map) = diff_graph.all_file_func_defs().functions_in_file(filename) { + // find func_defs for files in hunks + let funcs_def_vec = funcs_map.funcs_in_hunk(hunk_diff); + if !funcs_def_vec.is_empty() { + // add func def vec to something with file as key + diff_func_defs.extend_deleted_funcdefs(funcs_def_vec); + } + } + } + // find func call in hunks for each import + if let Some(imports_info) = diff_graph.all_file_imports().file_import_info(filename) { + for import_info in imports_info.all_import_paths() { + if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { + // add these func calls to something with file as key + let func_call = FuncCall{ import_info, call_info: func_calls }; + diff_func_calls.add_added_calls(func_call); + } + } + } + } + diff_graph.add_func_def(filename.to_string(), diff_func_defs); + diff_graph.add_diff_func_calls(filename.to_string(), diff_func_calls); + } + return Some(diff_graph); } -pub fn generate_diff_info(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> DiffInfo { +// fn added_functions_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { +// let mut added_funcs = HashMap::>::new(); +// for filename in diff_graph.function_info().all_files() { +// let func_map_opt = full_graph.function_info().functions_in_file(filename); +// if func_map_opt.is_none() { +// if let Some(diff_func_map) = diff_graph.function_info().functions_in_file(filename) { +// let funcs_vec = diff_func_map.functions().to_owned(); +// added_funcs.entry(filename.to_string()) +// .or_insert_with(Vec::new) +// .extend(funcs_vec); +// } +// } else { +// let full_func_map = func_map_opt.expect("Empty func_map_opt"); +// if let Some(diff_func_map) = diff_graph.function_info().functions_in_file(filename) { +// for func in diff_func_map.functions() { +// if !full_func_map.is_func_in_file(func) { +// added_funcs.entry(filename.to_string()) +// .or_insert_with(Vec::new) +// .push(func.to_owned()); +// } +// } +// } +// } +// } +// if added_funcs.is_empty() { +// return None; +// } +// return Some(added_funcs); +// } + +// fn deleted_functions_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { +// let mut deleted_funcs = HashMap::>::new(); +// for filename in diff_graph.function_info().all_files() { +// // TODO - full file deleted? +// let funcs_opt = full_graph.function_info().functions_in_file(filename); +// if funcs_opt.is_none() { +// // file added +// } +// let full_funcs = funcs_opt.expect("Empty funcs_opt"); +// let diff_funcs = diff_graph.function_info().functions_in_file(filename).expect("Empty diff_funcs"); +// for func in full_funcs.functions() { +// if diff_funcs.is_func_in_file(func) { +// deleted_funcs.entry(filename.to_string()) +// .or_insert_with(Vec::new) +// .push(func.to_owned()); +// } +// } +// } +// if deleted_funcs.is_empty() { +// return None; +// } +// return Some(deleted_funcs) +// } + +// fn added_imports_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { +// let mut added_imports = HashMap::>::new(); +// for filename in diff_graph.import_info().files() { +// let diff_imports = diff_graph +// .import_info() +// .file_import_info(filename).expect("Empty diff imports"); +// let full_imports_opt = full_graph +// .import_info().file_import_info(filename); +// if full_imports_opt.is_none() { +// added_imports.entry(filename.to_string()) +// .or_insert_with(Vec::new) +// .extend(diff_imports.all_import_paths()); +// } else { +// for import_path in diff_imports.all_import_paths() { +// if !full_graph.import_info().is_import_in_file(filename, &import_path) { +// added_imports.entry(filename.to_string()) +// .or_insert_with(Vec::new) +// .push(import_path); +// } +// } +// } +// } +// if added_imports.is_empty() { +// return None; +// } +// return Some(added_imports); +// } + +// fn deleted_imports_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { +// let mut deleted_imports = HashMap::>::new(); +// // TODO - file deleted +// for filename in diff_graph.import_info().files() { +// let full_imports_opt = full_graph.import_info().file_import_info(filename); +// if full_imports_opt.is_none() { +// // file added +// } +// let full_imports = full_imports_opt.expect("Empty full_imports_opt"); +// for import_path in full_imports.all_import_paths() { +// if !diff_graph.import_info().is_import_in_file(filename, &import_path) { +// deleted_imports.entry(filename.to_string()) +// .or_insert_with(Vec::new) +// .push(import_path); +// } +// } +// } +// if deleted_imports.is_empty() { +// return None; +// } +// return Some(deleted_imports); +// } + +// pub fn generate_diff_info(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> DiffInfo { // Get added funcs and imports - let added_funcs_opt = added_functions_diff(full_graph, diff_graph); - let deleted_funcs_opt = deleted_functions_diff(full_graph, diff_graph); - let added_imports_opt = added_imports_diff(full_graph, diff_graph); - let deleted_imports_opt = deleted_imports_diff(full_graph, diff_graph); - return DiffInfo { - added_funcs: added_funcs_opt, - deleted_funcs: deleted_funcs_opt, - added_imports: added_imports_opt, - deleted_imports: deleted_imports_opt - }; -} \ No newline at end of file + // let added_funcs_opt = added_functions_diff(full_graph, diff_graph); + // let deleted_funcs_opt = deleted_functions_diff(full_graph, diff_graph); + // let added_imports_opt = added_imports_diff(full_graph, diff_graph); + // let deleted_imports_opt = deleted_imports_diff(full_graph, diff_graph); + // return DiffInfo { + // added_funcs: added_funcs_opt, + // deleted_funcs: deleted_funcs_opt, + // added_imports: added_imports_opt, + // deleted_imports: deleted_imports_opt + // }; +// } \ No newline at end of file diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs index 0094f1c0..6eae65d0 100644 --- a/vibi-dpu/src/graph/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -1,13 +1,10 @@ -use std::{borrow::{Borrow, BorrowMut}, collections::HashMap}; -use crate::{graph::{file_imports::get_import_lines, graph_info::{generate_diff_graph, generate_diff_info, generate_full_graph}}, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; +use crate::{graph::{elements::MermaidGraphElements, graph_edges::graph_edges, graph_info::generate_diff_graph}, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; -use super::{elements::{MermaidEdge, MermaidEdges, MermaidNode, MermaidSubgraph}, function_line_range::generate_function_map, gitops::get_changed_files, graph_info::GraphInfo, utils::read_file}; +use super::{file_imports::get_import_lines, utils::all_code_files}; -pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Review) -> Option { - - // generate graph using AllFileFunctions, ImportLines and ImportPath +pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Review) -> Option { let flowchart_content_res = generate_flowchart_elements(diff_files, review).await; if flowchart_content_res.is_none() { log::error!("[generate_mermaid_flowchart] Unable to generate flowchart content, review: {}", review.id()); @@ -24,18 +21,31 @@ pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Rev async fn generate_flowchart_elements(diff_files: &Vec, review: &Review) -> Option { // generate full graph for base commit id git_checkout_commit(review, review.base_head_commit()); - let full_graph_opt = generate_full_graph(&review.clone_dir(), - &review.db_key(), &review.base_head_commit()).await; - if full_graph_opt.is_none() { + // let full_graph_opt = generate_full_graph(&review.clone_dir(), + // &review.db_key(), &review.base_head_commit()).await; + // if full_graph_opt.is_none() { + // log::error!( + // "[generate_flowchart_elements] Unable to generate full graph for review: {}", + // review.id()); + // return None; + // } + // let full_graph = full_graph_opt.expect("Empty full_graph_opt"); + // // generate diff graph for head commit id + let repo_code_files_opt = all_code_files(review.clone_dir()); + if repo_code_files_opt.is_none() { log::error!( - "[generate_flowchart_elements] Unable to generate full graph for review: {}", - review.id()); + "[generate_full_graph] Unable to get file paths: {}", review.clone_dir()); return None; } - let full_graph = full_graph_opt.expect("Empty full_graph_opt"); - // generate diff graph for head commit id + let repo_code_files = repo_code_files_opt.expect("Empty repo_code_files_opt"); + let all_file_import_info_opt = get_import_lines(&repo_code_files).await; + if all_file_import_info_opt.is_none() { + log::error!("[generate_graph_info] Unable to get import info for source files: {:#?}", &repo_code_files); + return None; + } + let all_file_import_info = all_file_import_info_opt.expect("Empty import_lines_opt"); git_checkout_commit(review, review.pr_head_commit()); - let (diff_graph_opt, deleted_files_opt) = generate_diff_graph(diff_files).await; + let diff_graph_opt = generate_diff_graph(diff_files, review).await; if diff_graph_opt.is_none() { log::error!( "[generate_flowchart_elements] Unable to generate diff graph for review: {}", @@ -43,220 +53,223 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review return None; } let diff_graph = diff_graph_opt.expect("Empty diff_graph_opt"); - let diff_info = generate_diff_info(&full_graph, &diff_graph); - - - let (file_lines_del_map, file_lines_add_map) = get_changed_files(diff_files, review); - let mut subgraph_map = HashMap::::new(); - let mut edges = MermaidEdges::new(Vec::::new()); - let files: Vec = diff_files.iter().map(|item| item.filepath.clone()).collect(); - for file in files.iter() { - if file_lines_add_map.contains_key(file) { - generate_mermaid_content( - &mut subgraph_map, - review, - file, - &file_lines_add_map, - &mut edges, - "green" - ).await; - } - } + // let diff_info = generate_diff_info(&full_graph, &diff_graph); + let mut graph_elems = MermaidGraphElements::new(); git_checkout_commit(review, review.base_head_commit()); - for file in files.iter() { - if file_lines_del_map.contains_key(file) { - generate_mermaid_content( - &mut subgraph_map, - review, - file, - &file_lines_del_map, - &mut edges, - "red" - ).await; - } - } - log::debug!("[generate_flowchart_elements] subgraph_map = {:#?}", &subgraph_map); + graph_edges(review, &all_file_import_info, &diff_graph, &mut graph_elems).await; + + // let (file_lines_del_map, file_lines_add_map) = get_changed_files(diff_files, review); + // let mut subgraph_map = HashMap::::new(); + // let mut edges = MermaidEdges::new(Vec::::new()); + // let files: Vec = diff_files.iter().map(|item| item.filepath.clone()).collect(); + // for file in files.iter() { + // if file_lines_add_map.contains_key(file) { + // generate_mermaid_content( + // &mut subgraph_map, + // review, + // file, + // &file_lines_add_map, + // &mut edges, + // "green" + // ).await; + // } + // } + // git_checkout_commit(review, review.base_head_commit()); + // for file in files.iter() { + // if file_lines_del_map.contains_key(file) { + // generate_mermaid_content( + // &mut subgraph_map, + // review, + // file, + // &file_lines_del_map, + // &mut edges, + // "red" + // ).await; + // } + // } + // log::debug!("[generate_flowchart_elements] subgraph_map = {:#?}", &subgraph_map); // Render content string - let subgraphs_str = subgraph_map.values().map( - |subgraph| subgraph.render_subgraph() - ).collect::>().join("\n"); - let edges_str = edges.render_edges(); - let content_str = format!("{}\n{}", &subgraphs_str, &edges_str); - return Some(content_str); + let elems_str = graph_elems.render_elements(); + // let subgraphs_str = subgraph_map.values().map( + // |subgraph| subgraph.render_subgraph() + // ).collect::>().join("\n"); + // let edges_str = edges.render_edges(); + // let content_str = format!("{}\n{}", &subgraphs_str, &edges_str); + return Some(elems_str); } -async fn generate_mermaid_content( - subgraph_map: &mut HashMap, review: &Review, file: &str, - file_lines_map: &HashMap>, - edges: &mut MermaidEdges, - color: &str -) { - if !file.ends_with(".rs") { - log::debug!("[mermaid_comment] File extension not valid: {}", &file); - return; - } - let file_path = format!("{}/{}", review.clone_dir(), &file); - let file_contents_res = read_file(&file_path); - if file_contents_res.is_none() { - log::error!( - "[generate_mermaid_content] Unable to read changed file content: {}", &file_path); - return; - } - let file_contents = file_contents_res.expect("Empty file_contents_res"); - let numbered_content = file_contents - .lines() - .enumerate() - .map(|(index, line)| format!("{} {}", index, line)) - .collect::>() - .join("\n"); - let flinemap_opt = extract_function_lines( - &numbered_content, - file - ).await; - if flinemap_opt.is_none() { - log::debug!( - "[generate_mermaid_content] Unable to generate function line map for file: {}", file); - return; - } - let flinemap = flinemap_opt.expect("Empty flinemap_opt"); - // deleted lines - let called_info_del_opt = generate_called_function_info( - file_lines_map, &numbered_content, file).await; - if called_info_del_opt.is_none() { - log::error!("[generate_mermaid_content] Unable to generate called functions info"); - return; - } - let (called_funcs_del, called_func_paths_del) = called_info_del_opt.expect("Empty called_info_opt"); - generate_callee_nodes(&called_func_paths_del, subgraph_map); - generate_caller_elements( - subgraph_map, - &file_lines_map[file], - &flinemap, - &called_funcs_del, - &called_func_paths_del, - edges, - &file, - color); - return; -} +// async fn generate_mermaid_content( +// subgraph_map: &mut HashMap, review: &Review, file: &str, +// file_lines_map: &HashMap>, +// edges: &mut MermaidEdges, +// color: &str +// ) { +// if !file.ends_with(".rs") { +// log::debug!("[mermaid_comment] File extension not valid: {}", &file); +// return; +// } +// let file_path = format!("{}/{}", review.clone_dir(), &file); +// let file_contents_res = read_file(&file_path); +// if file_contents_res.is_none() { +// log::error!( +// "[generate_mermaid_content] Unable to read changed file content: {}", &file_path); +// return; +// } +// let file_contents = file_contents_res.expect("Empty file_contents_res"); +// let numbered_content = file_contents +// .lines() +// .enumerate() +// .map(|(index, line)| format!("{} {}", index, line)) +// .collect::>() +// .join("\n"); +// let flinemap_opt = extract_function_lines( +// &numbered_content, +// file +// ).await; +// if flinemap_opt.is_none() { +// log::debug!( +// "[generate_mermaid_content] Unable to generate function line map for file: {}", file); +// return; +// } +// let flinemap = flinemap_opt.expect("Empty flinemap_opt"); +// // deleted lines +// let called_info_del_opt = generate_called_function_info( +// file_lines_map, &numbered_content, file).await; +// if called_info_del_opt.is_none() { +// log::error!("[generate_mermaid_content] Unable to generate called functions info"); +// return; +// } +// let (called_funcs_del, called_func_paths_del) = called_info_del_opt.expect("Empty called_info_opt"); +// generate_callee_nodes(&called_func_paths_del, subgraph_map); +// generate_caller_elements( +// subgraph_map, +// &file_lines_map[file], +// &flinemap, +// &called_funcs_del, +// &called_func_paths_del, +// edges, +// &file, +// color); +// return; +// } -fn generate_caller_elements(subgraph_map: &mut HashMap, - hunk_lines: &Vec<(usize, usize)>, - flinemap: &Vec, - called_funcs: &Vec, - called_funcs_path: &Vec, - edges: &mut MermaidEdges, - filename: &str, - color: &str) -{ - for cf in called_funcs { - let func_name_opt = get_func_from_line(cf.line, flinemap); - if func_name_opt.is_none() { - log::debug!("[generate_caller_elements] Unable to get func name for line: {:?}", cf.line); - continue; - } - let func_name = func_name_opt.expect("Empty func_name_opt"); - let caller_node; +// fn generate_caller_elements(subgraph_map: &mut HashMap, +// hunk_lines: &Vec<(usize, usize)>, +// flinemap: &Vec, +// called_funcs: &Vec, +// called_funcs_path: &Vec, +// edges: &mut MermaidEdges, +// filename: &str, +// color: &str) +// { +// for cf in called_funcs { +// let func_name_opt = get_func_from_line(cf.line, flinemap); +// if func_name_opt.is_none() { +// log::debug!("[generate_caller_elements] Unable to get func name for line: {:?}", cf.line); +// continue; +// } +// let func_name = func_name_opt.expect("Empty func_name_opt"); +// let caller_node; - // Borrow subgraph_map mutably to either retrieve or insert the subgraph - let subgraph = subgraph_map.entry(filename.to_string()).or_insert_with(|| { - MermaidSubgraph::new(filename.to_string(), HashMap::new()) - }); +// // Borrow subgraph_map mutably to either retrieve or insert the subgraph +// let subgraph = subgraph_map.entry(filename.to_string()).or_insert_with(|| { +// MermaidSubgraph::new(filename.to_string(), HashMap::new()) +// }); - // Borrow subgraph mutably to either retrieve or insert the node - if let Some(node) = subgraph.nodes().get(&func_name) { - caller_node = node.to_owned(); - } else { - caller_node = MermaidNode::new(func_name.clone()); - subgraph.add_node(caller_node.clone()); - } +// // Borrow subgraph mutably to either retrieve or insert the node +// if let Some(node) = subgraph.nodes().get(&func_name) { +// caller_node = node.to_owned(); +// } else { +// caller_node = MermaidNode::new(func_name.clone()); +// subgraph.add_node(caller_node.clone()); +// } - log::debug!("[generate_caller_elements] subgraph_map = {:#?}", subgraph_map); +// log::debug!("[generate_caller_elements] subgraph_map = {:#?}", subgraph_map); - for cfp in called_funcs_path { - if cf.name == cfp.function_name { - // Ensure we do not have an immutable borrow of subgraph_map while we borrow it immutably here - if let Some(import_subgraph) = subgraph_map.get(&cfp.import_path) { - if let Some(called_node) = import_subgraph.nodes().get(&cf.name) { - edges.add_edge(MermaidEdge::new( - cf.line, - caller_node.clone(), - called_node.to_owned(), - color.to_string() - )); - } - } - } - } - log::debug!("[generate_caller_elements] edges = {:#?}", &edges); - } -} +// for cfp in called_funcs_path { +// if cf.name == cfp.function_name { +// // Ensure we do not have an immutable borrow of subgraph_map while we borrow it immutably here +// if let Some(import_subgraph) = subgraph_map.get(&cfp.import_path) { +// if let Some(called_node) = import_subgraph.nodes().get(&cf.name) { +// edges.add_edge(MermaidEdge::new( +// cf.line, +// caller_node.clone(), +// called_node.to_owned(), +// color.to_string() +// )); +// } +// } +// } +// } +// log::debug!("[generate_caller_elements] edges = {:#?}", &edges); +// } +// } -fn get_func_from_line(line: usize, flinemaps: &[FunctionLineMap]) -> Option { - for flinemap in flinemaps { - log::debug!("[get_func_from_line] flinemap = {:#?}, line: {}", &flinemap, line); - log::debug!( - "[get_func_from_line] condition = {:?}", - (flinemap.line_start <= line as i32 && flinemap.line_end >= line as i32)); - if flinemap.line_start <= line as i32 && flinemap.line_end >= line as i32 { - log::debug!("[get_func_from_line] inside if"); - return Some(flinemap.name.to_string()); - } - } - return None; -} +// fn get_func_from_line(line: usize, flinemaps: &[FunctionLineMap]) -> Option { +// for flinemap in flinemaps { +// log::debug!("[get_func_from_line] flinemap = {:#?}, line: {}", &flinemap, line); +// log::debug!( +// "[get_func_from_line] condition = {:?}", +// (flinemap.line_start <= line as i32 && flinemap.line_end >= line as i32)); +// if flinemap.line_start <= line as i32 && flinemap.line_end >= line as i32 { +// log::debug!("[get_func_from_line] inside if"); +// return Some(flinemap.name.to_string()); +// } +// } +// return None; +// } -fn generate_callee_nodes( - called_funcs_path: &[CalledFunctionPath], - subgraph_map: &mut HashMap) -{ - for cfp in called_funcs_path { - if let Some(subgraph) = subgraph_map.get_mut(&cfp.import_path) { - subgraph.add_node( - MermaidNode::new(cfp.function_name.to_string()) - ); - } else { - // Create new subgraph - // Create new node - // Add to subgraph_map - let mut node_map = HashMap::::new(); - node_map.insert(cfp.function_name.to_string(), MermaidNode::new(cfp.function_name.to_string())); - let subgraph = MermaidSubgraph::new( - cfp.import_path.to_string(), - node_map - ); - subgraph_map.insert(cfp.import_path.to_string(), subgraph); - } - } - return; -} +// fn generate_callee_nodes( +// called_funcs_path: &[CalledFunctionPath], +// subgraph_map: &mut HashMap) +// { +// for cfp in called_funcs_path { +// if let Some(subgraph) = subgraph_map.get_mut(&cfp.import_path) { +// subgraph.add_node( +// MermaidNode::new(cfp.function_name.to_string()) +// ); +// } else { +// // Create new subgraph +// // Create new node +// // Add to subgraph_map +// let mut node_map = HashMap::::new(); +// node_map.insert(cfp.function_name.to_string(), MermaidNode::new(cfp.function_name.to_string())); +// let subgraph = MermaidSubgraph::new( +// cfp.import_path.to_string(), +// node_map +// ); +// subgraph_map.insert(cfp.import_path.to_string(), subgraph); +// } +// } +// return; +// } -async fn generate_called_function_info(file_lines_map: &HashMap>, - numbered_content: &str, filename: &str -) - -> Option<(Vec, Vec)> -{ - let del_lines = &file_lines_map[filename]; - let called_funcs_opt = extract_function_calls( - del_lines, - &numbered_content, - filename - ).await; - if called_funcs_opt.is_none() { - log::error!("[generate_called_function_info] Unable to get called functions for file: {}", filename); - return None; - } - let called_funcs = called_funcs_opt.expect("Empty called_funcs_opt"); - let called_func_paths_opt = extract_function_import_path( - &called_funcs, - &numbered_content, - filename - ).await; - if called_func_paths_opt.is_none() { - log::error!("[generate_called_function_info] Unable to get called function paths for file: {}", filename); - return None; - } - let called_func_paths = called_func_paths_opt.expect("Empty called_func_paths_opt"); - return Some((called_funcs, called_func_paths)); -} \ No newline at end of file +// async fn generate_called_function_info(file_lines_map: &HashMap>, +// numbered_content: &str, filename: &str +// ) +// -> Option<(Vec, Vec)> +// { +// let del_lines = &file_lines_map[filename]; +// let called_funcs_opt = extract_function_calls( +// del_lines, +// &numbered_content, +// filename +// ).await; +// if called_funcs_opt.is_none() { +// log::error!("[generate_called_function_info] Unable to get called functions for file: {}", filename); +// return None; +// } +// let called_funcs = called_funcs_opt.expect("Empty called_funcs_opt"); +// let called_func_paths_opt = extract_function_import_path( +// &called_funcs, +// &numbered_content, +// filename +// ).await; +// if called_func_paths_opt.is_none() { +// log::error!("[generate_called_function_info] Unable to get called function paths for file: {}", filename); +// return None; +// } +// let called_func_paths = called_func_paths_opt.expect("Empty called_func_paths_opt"); +// return Some((called_funcs, called_func_paths)); +// } \ No newline at end of file diff --git a/vibi-dpu/src/graph/mod.rs b/vibi-dpu/src/graph/mod.rs index 5df01be1..f36287bc 100644 --- a/vibi-dpu/src/graph/mod.rs +++ b/vibi-dpu/src/graph/mod.rs @@ -5,4 +5,5 @@ pub mod elements; pub mod function_line_range; pub mod file_imports; pub mod graph_info; -pub mod graph_edges; \ No newline at end of file +pub mod graph_edges; +pub mod function_call; \ No newline at end of file diff --git a/vibi-dpu/src/graph/utils.rs b/vibi-dpu/src/graph/utils.rs index a0e31e09..8302df21 100644 --- a/vibi-dpu/src/graph/utils.rs +++ b/vibi-dpu/src/graph/utils.rs @@ -3,6 +3,7 @@ use std::{collections::HashMap, path::{Path, PathBuf}, slice::Chunks}; use futures_util::StreamExt; use serde::{Deserialize, Serialize}; use serde_json::json; +use strsim::jaro_winkler; use walkdir::WalkDir; use std::fs; use rand::Rng; @@ -136,28 +137,19 @@ pub fn all_code_files(dir: &str) -> Option> { return Some(code_files); } -pub fn source_diff_files(diff_files: &Vec) -> (Option>, Option>) { - let mut code_files = Vec::::new(); - let mut deleted_files = Vec::::new(); +pub fn source_diff_files(diff_files: &Vec) -> Option> { + let mut code_files = Vec::::new(); for stat_item in diff_files { let filepath_str = &stat_item.filepath; - let filepath = Path::new(filepath_str); + let filepath = Path::new(filepath_str); if filepath.extension().and_then(|ext| ext.to_str()) == Some("rs") { - code_files.push(filepath.to_path_buf()); - } - if !filepath.exists() { - deleted_files.push(filepath.to_path_buf()); + code_files.push(stat_item.clone()); } } - let mut code_files_retval = None; - let mut deleted_files_retval = None; - if !code_files.is_empty() { - code_files_retval = Some(code_files); - } - if !deleted_files.is_empty() { - deleted_files_retval = Some(deleted_files); + if code_files.is_empty() { + return None; } - return (code_files_retval, deleted_files_retval); + return Some(code_files); } pub fn numbered_content(file_contents: String) -> Vec { @@ -167,4 +159,12 @@ pub fn numbered_content(file_contents: String) -> Vec { .map(|(index, line)| format!("{} {}", index+1, line)) .collect::>(); return lines; +} + +pub fn match_overlap(str1: &str, str2: &str, similarity_threshold: f64) -> bool { + let similarity = jaro_winkler(str1, str2); + if similarity >= similarity_threshold { + return true; + } + return false; } \ No newline at end of file From 0799afe8e5168ce1ae767acafdbcc547dbed82be Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Fri, 6 Sep 2024 05:10:41 +0530 Subject: [PATCH 23/43] fix edge storage and duplication --- vibi-dpu/src/graph/elements.rs | 244 ++++++++++++++++++++---------- vibi-dpu/src/graph/graph_edges.rs | 144 +++++------------- 2 files changed, 201 insertions(+), 187 deletions(-) diff --git a/vibi-dpu/src/graph/elements.rs b/vibi-dpu/src/graph/elements.rs index 34d8e331..4fccda22 100644 --- a/vibi-dpu/src/graph/elements.rs +++ b/vibi-dpu/src/graph/elements.rs @@ -11,8 +11,9 @@ use super::utils::generate_random_string; #[derive(Debug, Default, Clone)] pub struct MermaidSubgraph { name: String, - nodes: HashMap>>, + nodes: HashMap, mermaid_id: String, + color: String } impl MermaidSubgraph { @@ -23,11 +24,12 @@ impl MermaidSubgraph { name, nodes: HashMap::new(), mermaid_id, + color: "".to_string() } } // Getter for nodes - pub fn nodes(&self) -> &HashMap>> { + pub fn nodes(&self) -> &HashMap { &self.nodes } @@ -35,37 +37,43 @@ impl MermaidSubgraph { &self.mermaid_id } + pub fn set_color(&mut self, color: &str) { + self.color = color.to_string(); + } + + pub fn name(&self) -> &String { + &self.name + } + // Setter for nodes - pub fn set_nodes(&mut self, nodes: HashMap>>) { + pub fn set_nodes(&mut self, nodes: HashMap) { self.nodes = nodes; } - pub fn add_node(&mut self, node: &Arc>) { - let node_owned = Arc::clone(node); - let function_name = { - let node_borrowed = node_owned.lock().unwrap(); - node_borrowed.function_name().to_string() - }; - if self.nodes.contains_key(&function_name) { + pub fn add_node(&mut self, node: MermaidNode) { + if self.nodes.contains_key(node.mermaid_id()) { log::error!( "[add_node] Node already exists: old - {:#?}, new - {:#?}", - &self.nodes[&function_name], + &self.nodes[node.mermaid_id()], node ); return; } - self.nodes.insert(function_name, node_owned); + self.nodes.insert(node.mermaid_id().to_string(), node.to_owned()); } - pub fn get_node(&self, func_name: &str) -> Option<&Arc>> { + pub fn get_node(&self, func_name: &str) -> Option<&MermaidNode> { self.nodes.get(func_name) } + pub fn get_mut_node(&mut self, func_name: &str) -> Option<&mut MermaidNode> { + self.nodes.get_mut(func_name) + } + pub fn render_subgraph(&self) -> String { let mut all_nodes = Vec::new(); for (_, node) in self.nodes() { - let node_borrowed = node.lock().unwrap(); - all_nodes.push(node_borrowed.render_node()); + all_nodes.push(node.render_node()); } let subgraph_str = format!( "\tsubgraph {} [{}]\n{}\nend\n", @@ -81,21 +89,24 @@ impl MermaidSubgraph { pub struct MermaidNode { function_name: String, mermaid_id: String, + parent_id: String, + color: String } impl MermaidNode { // Constructor - pub fn new(function_name: String) -> Self { + pub fn new(function_name: String, parent_id: String) -> Self { let mermaid_id = generate_random_string(4); Self { mermaid_id, function_name, + parent_id, + color: "".to_string() } } - // Getter for function_name - pub fn function_name(&self) -> &String { - &self.function_name + pub fn color(&self) -> &String { + &self.color } // Getter for mermaid_id @@ -103,12 +114,28 @@ impl MermaidNode { &self.mermaid_id } + pub fn parent_id(&self) -> &String { + &self.parent_id + } + + pub fn set_color(&mut self, color: &str) { + self.color = color.to_string() + } + // Setter for function_name pub fn set_function_name(&mut self, function_name: String) { self.function_name = function_name; } + pub fn compare_and_change_color(&mut self, node_color: &str) { + if (self.color() == "red" && node_color == "green") || + (self.color() == "green" && node_color == "red") { + self.set_color("yellow"); + } + } + pub fn render_node(&self) -> String { + // TODO FIXME - get line num or funcdef obj let node_str = format!("\t{}[{}]", &self.mermaid_id, &self.function_name); node_str } @@ -117,23 +144,29 @@ impl MermaidNode { #[derive(Debug, Default, Clone)] pub struct MermaidEdge { line: usize, - caller_function: Arc>, - called_function: Arc>, - color: String, + src_func_key: String, + src_subgraph_key: String, + dest_func_key: String, + dest_subgraph_key: String, + color: String } impl MermaidEdge { // Constructor pub fn new( line: usize, - caller_function: &Arc>, - called_function: &Arc>, + src_func_key: String, + src_subgraph_key: String, + dest_func_key: String, + dest_subgraph_key: String, color: String, ) -> Self { Self { line, - caller_function: Arc::clone(caller_function), - called_function: Arc::clone(called_function), + src_func_key, + src_subgraph_key, + dest_func_key, + dest_subgraph_key, color, } } @@ -148,25 +181,76 @@ impl MermaidEdge { &self.color } + // Getter for src_func_key + pub fn src_func_key(&self) -> &String { + &self.src_func_key + } + + // Getter for src_subgraph_key + pub fn src_subgraph_key(&self) -> &String { + &self.src_subgraph_key + } + + // Getter for dest_func_key + pub fn dest_func_key(&self) -> &String { + &self.dest_func_key + } + + // Getter for dest_subgraph_key + pub fn dest_subgraph_key(&self) -> &String { + &self.dest_subgraph_key + } + // Setter for color - pub fn set_color(&mut self, color: String) { - self.color = color; + pub fn set_color(&mut self, color: &str) { + self.color = color.to_string(); + } + + pub fn compare_and_set_color(&mut self, edge_color: &str) { + if (self.color() == "green" && edge_color == "red") || + (self.color() == "red" && edge_color == "green") { + self.set_color("yellow"); + } } pub fn add_edge_and_nodes(&mut self) { // add edge and source and destination nodes } - pub fn render_edge_definition(&self) -> String { - let (caller_str, called_str) = { - let caller_borrowed = self.caller_function.lock().unwrap(); - let called_borrowed = self.called_function.lock().unwrap(); - ( - caller_borrowed.function_name().to_string(), - called_borrowed.function_name().to_string(), - ) - }; - let edge_str = format!("\t{} -- Line {} --> {}\n", caller_str, self.line, called_str); + pub fn get_edge_key(&self) -> String { + let edge_key = format!( + "{}/{}/{}/{}/{}", self.src_subgraph_key(), self.src_func_key(), + self.line(), + self.dest_subgraph_key(), self.dest_func_key() + ); + return edge_key; + } + + pub fn render_edge_definition(&self, subgraph_map: &HashMap) -> String { + let src_subgraph_opt = subgraph_map.get(&self.src_subgraph_key); + if src_subgraph_opt.is_none() { + log::debug!("[render_edge_definition] Unable to get subgraph: {}", &self.src_subgraph_key); + return "".to_string(); + } + let src_node_opt = src_subgraph_opt.expect("Empty src_subgraph_opt").nodes().get(&self.src_func_key); + if src_node_opt.is_none() { + log::debug!("[render_edge_definition] Unable to get node: {} in subgraph: {}", &self.src_func_key, &self.src_subgraph_key); + return "".to_string(); + } + let src_node = src_node_opt.expect("Empty src_node_opt"); + + let dest_subgraph_opt = subgraph_map.get(&self.dest_subgraph_key); + if dest_subgraph_opt.is_none() { + log::debug!("[render_edge_definition] Unable to get subgraph: {}", &self.dest_subgraph_key); + return "".to_string(); + } + let dest_node_opt = dest_subgraph_opt.expect("Empty src_subgraph_opt").nodes().get(&self.dest_func_key); + if dest_node_opt.is_none() { + log::debug!("[render_edge_definition] Unable to get node: {} in subgraph: {}", &self.dest_func_key, &self.dest_subgraph_key); + return "".to_string(); + } + let dest_node = dest_node_opt.expect("Empty src_node_opt"); + let edge_str = format!("\t{} -- Line {} --> {}\n", src_node.mermaid_id(), self.line, dest_node.mermaid_id()); edge_str } @@ -178,81 +262,81 @@ impl MermaidEdge { #[derive(Debug, Default, Clone)] pub struct MermaidGraphElements { - edges: Vec, + edges: HashMap, subgraphs: HashMap, } impl MermaidGraphElements { pub fn new() -> Self { Self { - edges: Vec::new(), + edges: HashMap::new(), subgraphs: HashMap::new(), } } - pub fn subgraph_for_file(&self, file: &str) -> Option<&MermaidSubgraph> { - self.subgraphs.get(file) - } - pub fn add_edge( &mut self, edge_color: &str, - line: usize, + calling_line_num: usize, source_func_name: &str, dest_func_name: &str, source_file: &str, dest_file: &str, - ) { - let source_node: Arc>; - let dest_node: Arc>; + source_color: &str, + dest_color: &str + ) { + self.create_node(source_file, source_func_name, source_color); + self.create_node(dest_file, dest_func_name, dest_color); + let edge = MermaidEdge::new( + calling_line_num, + source_func_name.to_string(), + source_file.to_string(), + dest_func_name.to_string(), + dest_file.to_string(), + edge_color.to_string()); + self.add_edge_to_edges(edge); + } - if let Some(subgraph) = self.subgraphs.get_mut(source_file) { - if let Some(node) = subgraph.get_node(source_func_name) { - source_node = Arc::clone(node); + fn create_node(&mut self, subgraph_key: &str, node_func_name: &str, node_color: &str) { + if let Some(subgraph) = self.subgraphs.get_mut(subgraph_key) { + if let Some(node) = subgraph.get_mut_node(node_func_name) { + node.compare_and_change_color(node_color); } else { - let node = MermaidNode::new(source_func_name.to_string()); - source_node = Arc::new(Mutex::new(node)); - subgraph.add_node(&source_node); + let mut node = MermaidNode::new(node_func_name.to_string(), + subgraph.mermaid_id().to_string()); + node.set_color(node_color); + subgraph.add_node(node); } } else { - let node = MermaidNode::new(source_func_name.to_string()); - source_node = Arc::new(Mutex::new(node)); - let mut subgraph = MermaidSubgraph::new(source_file.to_string()); - subgraph.add_node(&source_node); + let mut subgraph = MermaidSubgraph::new(subgraph_key.to_string()); + let mut node = MermaidNode::new(node_func_name.to_string(), + subgraph.mermaid_id().to_string()); + node.set_color(node_color); + subgraph.add_node(node); self.add_subgraph(subgraph); } + } - if let Some(subgraph) = self.subgraphs.get_mut(dest_file) { - if let Some(node) = subgraph.get_node(dest_func_name) { - dest_node = Arc::clone(node); - } else { - let node = MermaidNode::new(dest_func_name.to_string()); - dest_node = Arc::new(Mutex::new(node)); - subgraph.add_node(&dest_node); - } - } else { - let node = MermaidNode::new(dest_func_name.to_string()); - dest_node = Arc::new(Mutex::new(node)); - let mut subgraph = MermaidSubgraph::new(dest_file.to_string()); - subgraph.add_node(&dest_node); - self.add_subgraph(subgraph); + fn add_subgraph(&mut self, subgraph: MermaidSubgraph) { + if !self.subgraphs.contains_key(subgraph.name()) { + self.subgraphs.insert(subgraph.name().to_string(), subgraph); } - - let edge = MermaidEdge::new(line, &source_node, &dest_node, edge_color.to_string()); - self.edges.push(edge); } - fn add_subgraph(&mut self, subgraph: MermaidSubgraph) { - if !self.subgraphs.contains_key(subgraph.mermaid_id()) { - self.subgraphs.insert(subgraph.mermaid_id().to_string(), subgraph); + fn add_edge_to_edges(&mut self, edge: MermaidEdge) { + let edge_key = edge.get_edge_key(); + if let Some(edge_mut) = self.edges.get_mut(&edge_key) { + edge_mut.compare_and_set_color(edge.color()); + return; } + self.edges.insert(edge_key, edge); } fn render_edges(&self) -> String { let mut all_edges = Vec::::new(); let mut all_edges_style = Vec::::new(); - for (idx, edge) in self.edges.iter().enumerate() { - all_edges.push(edge.render_edge_definition()); + for (idx, (_, edge)) in self.edges.iter().enumerate() { + all_edges.push(edge.render_edge_definition(&self.subgraphs)); all_edges_style.push(format!("\tlinkStyle {} {}", idx, edge.render_edge_style())); } let all_edges_str = format!("{}{}", all_edges.join("\n"), all_edges_style.join("\n")); diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs index 8e1387dd..584cd860 100644 --- a/vibi-dpu/src/graph/graph_edges.rs +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -4,8 +4,8 @@ use crate::utils::{gitops::git_checkout_commit, review::Review}; use super::{elements::MermaidGraphElements, file_imports::{AllFileImportInfo, ImportPath}, function_call::function_calls_in_file, function_line_range::{generate_function_map, FuncDefInfo, FunctionFileMap}, graph_info::DiffGraph, utils::match_overlap}; pub async fn graph_edges(review: &Review, all_import_info: &AllFileImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { + outgoing_edges(diff_graph, graph_elems).await; incoming_edges(review, all_import_info, diff_graph, graph_elems).await; - outgoing_edges(all_import_info, diff_graph, graph_elems).await; } async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { @@ -26,12 +26,15 @@ async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, di let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); for (line_num, source_func_def) in source_func_defs { if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("green", + graph_elems.add_edge("", line_num.to_owned(), &source_func_def.name(), &dest_func.name(), &source_filename, - dest_filename); + dest_filename, + "", + "green" + ); } } } @@ -53,12 +56,15 @@ async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, di let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); for (line_num, source_func_def) in source_func_defs { if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("green", + graph_elems.add_edge("", line_num.to_owned(), &source_func_def.name(), &dest_func.name(), &source_filename, - dest_filename); + dest_filename, + "", + "green" + ); } } } @@ -82,12 +88,15 @@ async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, di let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); for (line_num, source_func_def) in source_func_defs { if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("red", + graph_elems.add_edge("", line_num.to_owned(), &source_func_def.name(), &dest_func.name(), &source_filename, - dest_filename); + dest_filename, + "", + "red" + ); } } } @@ -113,7 +122,10 @@ async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, di &source_func_def.name(), &dest_func.name(), &source_filename, - dest_filename); + dest_filename, + "", + "red" + ); } } } @@ -124,32 +136,6 @@ async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, di } } -// async fn generate_incoming_edges(modified_funcs: &HashMap>, full_graph: &GraphInfo, diff_graph: &GraphInfo, color: &str, graph_elems: &mut MermaidGraphElements) { -// for (dest_filename, dest_func_info_vec) in modified_funcs.iter() { -// for dest_func_info in dest_func_info_vec { -// search_imports_in_graph(&dest_filename, dest_func_info, -// full_graph, color, graph_elems).await; -// search_imports_in_graph(&dest_filename, dest_func_info, -// diff_graph, color, graph_elems).await; -// } -// } -// } - -// async fn search_imports_in_graph(dest_filename: &str, dest_func_info: &FuncDefInfo, search_graph: &GraphInfo, color: &str, graph_elems: &mut MermaidGraphElements) { -// for source_filename in search_graph.import_info().files() { -// if let Some(source_file_imports) = search_graph.import_info().file_import_info(source_filename) { -// let file_imports = source_file_imports.all_import_paths(); -// for import_obj in file_imports { -// if match_import_condition(dest_filename, &import_obj, dest_func_info) { -// if let Some(source_func_file_map) = search_graph.function_info().functions_in_file(source_filename) { -// add_edge_for_file(source_filename, source_func_file_map, dest_filename, dest_func_info, color, graph_elems).await; -// } -// } -// } -// } -// } -// } - fn match_import_condition(dest_filename: &str, import_obj: &ImportPath, dest_func_info: &FuncDefInfo) -> bool { match_overlap( &dest_filename, @@ -160,34 +146,11 @@ fn match_import_condition(dest_filename: &str, import_obj: &ImportPath, dest_fun 0.5) } -async fn add_edge_for_file(source_filename: &str, source_func_def: &FuncDefInfo, dest_filename: &str, dest_func_info: &FuncDefInfo, color: &str, graph_elems: &mut MermaidGraphElements) { - // TODO FIXME - do git commit checkout - let filepath = Path::new(source_filename); - let file_pathbuf = filepath.to_path_buf(); - if let Some(func_call_chunk) = - function_calls_in_file(&file_pathbuf, &dest_func_info.name()).await - { - for source_chunk_call in func_call_chunk { - for source_func_line in source_chunk_call.function_calls() { - if source_func_def != dest_func_info { - graph_elems.add_edge(color, - source_func_line.to_owned(), - &source_func_def.name(), - &dest_func_info.name(), - &source_filename, - dest_filename); - } - } - } - } -} - -async fn outgoing_edges(all_import_info: &AllFileImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { +async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { // TODO - git checkout for (source_filename, func_calls) in diff_graph.diff_func_calls() { for source_func_call in func_calls.added_calls() { let dest_filename = source_func_call.import_info().import_path(); - let func_name = source_func_call.import_info().imported(); let lines = source_func_call.call_info().iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); // send this file for getting func defs // search in diff graph @@ -204,7 +167,10 @@ async fn outgoing_edges(all_import_info: &AllFileImportInfo, diff_graph: &DiffGr source_func_def.name(), dest_func_def.name(), source_filename, - dest_filename); + dest_filename, + "green", + "" + ); } } } @@ -224,7 +190,10 @@ async fn outgoing_edges(all_import_info: &AllFileImportInfo, diff_graph: &DiffGr source_func_def.name(), dest_func_def.name(), source_filename, - dest_filename); + dest_filename, + "green", + "" + ); } } } @@ -234,7 +203,6 @@ async fn outgoing_edges(all_import_info: &AllFileImportInfo, diff_graph: &DiffGr // do same for deleted_calls for source_func_call in func_calls.deleted_calls() { let dest_filename = source_func_call.import_info().import_path(); - let func_name = source_func_call.import_info().imported(); let diff_file_funcdefs = diff_graph.all_file_func_defs(); let lines = source_func_call.call_info().iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); // identify this particular func @@ -249,10 +217,11 @@ async fn outgoing_edges(all_import_info: &AllFileImportInfo, diff_graph: &DiffGr source_func_def.name(), dest_func_def.name(), source_filename, - dest_filename); + dest_filename, + "red", + "" + ); } - // add_edge_for_file(source_filename, _, - // dest_filename, dest_func_def, "red", graph_elems).await; } } } @@ -271,54 +240,15 @@ async fn outgoing_edges(all_import_info: &AllFileImportInfo, diff_graph: &DiffGr source_func_def.name(), dest_func_def.name(), source_filename, - dest_filename); + dest_filename, + "red", + "" + ); } - // add_edge_for_file(source_filename, _, - // dest_filename, dest_func_def, "red", graph_elems).await; } } } } } } -} - -// async fn generate_outgoing_edges(modified_imports: &HashMap>, full_graph: &GraphInfo, diff_graph: &GraphInfo, color: &str, graph_elems: &mut MermaidGraphElements) { -// for (dest_filename, dest_import_info) in modified_imports.iter() { -// let filepath = Path::new(dest_filename); -// let file_pathbuf = filepath.to_path_buf(); -// for dest_import in dest_import_info { -// search_funcs_in_graph(full_graph, dest_import, &file_pathbuf, color, dest_filename, graph_elems).await; -// // TODO FIXME - think about similar edges being searched from both full and diff graph. How to avoid adding them repeatedly? -// search_funcs_in_graph(diff_graph, dest_import, &file_pathbuf, color, dest_filename, graph_elems).await; -// } -// } -// } - -// async fn search_funcs_in_graph(search_graph: &GraphInfo, dest_import: &ImportPath, file_pathbuf: &PathBuf, color: &str, dest_file: &str, graph_elems: &mut MermaidGraphElements) { -// for source_file in search_graph.function_info().all_files() { -// if match_overlap(&source_file, &dest_import.imported(), 0.5) { -// if let Some(source_file_func_calls) = -// function_calls_in_file(&file_pathbuf, &dest_import.imported()).await -// { -// if let Some(func_file_map) = -// search_graph.function_info().functions_in_file(source_file) -// { -// for func_call_chunk in source_file_func_calls { -// for source_file_line in func_call_chunk.function_calls() { -// if let Some(source_func_def) = func_file_map.func_at_line(source_file_line.to_owned()) { -// if source_func_def.name() != dest_import.imported() { -// graph_elems.add_edge(color, source_file_line.to_owned(), &source_func_def.name(), &dest_import.imported(), source_file, dest_file) -// } -// } -// } -// } -// } -// } -// } -// } -// } - -async fn edge_nodes() { - // render all edges and their nodes } \ No newline at end of file From 3bc8c645e5e9fadb74298681d1f46e00eee2410d Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Fri, 6 Sep 2024 06:39:31 +0530 Subject: [PATCH 24/43] add node styling and href --- vibi-dpu/src/graph/elements.rs | 84 ++++++++++++++++++++------ vibi-dpu/src/graph/graph_edges.rs | 32 +++++++--- vibi-dpu/src/graph/mermaid_elements.rs | 2 +- 3 files changed, 92 insertions(+), 26 deletions(-) diff --git a/vibi-dpu/src/graph/elements.rs b/vibi-dpu/src/graph/elements.rs index 4fccda22..d01a1809 100644 --- a/vibi-dpu/src/graph/elements.rs +++ b/vibi-dpu/src/graph/elements.rs @@ -6,6 +6,8 @@ use std::{ use serde::{Serialize, Deserialize}; // TODO, FIXME - remove all unwraps +use crate::utils::review::Review; + use super::utils::generate_random_string; #[derive(Debug, Default, Clone)] @@ -70,10 +72,10 @@ impl MermaidSubgraph { self.nodes.get_mut(func_name) } - pub fn render_subgraph(&self) -> String { + pub fn render_subgraph(&self, review: &Review, subgraph_map: &HashMap) -> String { let mut all_nodes = Vec::new(); for (_, node) in self.nodes() { - all_nodes.push(node.render_node()); + all_nodes.push(node.render_node(review, subgraph_map)); } let subgraph_str = format!( "\tsubgraph {} [{}]\n{}\nend\n", @@ -90,18 +92,20 @@ pub struct MermaidNode { function_name: String, mermaid_id: String, parent_id: String, - color: String + color: String, + def_line: usize } impl MermaidNode { // Constructor - pub fn new(function_name: String, parent_id: String) -> Self { + pub fn new(function_name: String, parent_id: String, def_line: usize) -> Self { let mermaid_id = generate_random_string(4); Self { mermaid_id, function_name, parent_id, - color: "".to_string() + color: "".to_string(), + def_line } } @@ -134,10 +138,54 @@ impl MermaidNode { } } - pub fn render_node(&self) -> String { + pub fn render_node(&self, review: &Review, subgraph_map: &HashMap) -> String { // TODO FIXME - get line num or funcdef obj + let url_str = format!("click {} href \"{}\" _blank", + self.mermaid_id(), self.get_node_str(review, subgraph_map)); + let class_str = self.get_style_class(); let node_str = format!("\t{}[{}]", &self.mermaid_id, &self.function_name); - node_str + return format!("{}\n{}\n{}", &node_str, &class_str, &url_str); + } + + fn get_node_str(&self, review: &Review, subgraph_map: &HashMap) -> String { + if let Some(subgraph) = subgraph_map.get(self.parent_id()) { + let file_hash = sha256::digest(subgraph.name()); + let mut diff_side_str = ""; + if self.color != "" { + if self.color == "green" || self.color == "yellow" { + diff_side_str = "R"; + } else if self.color == "red" { + diff_side_str = "L"; + } + return format!("https://github.com/{}/{}/pull/{}/files#diff-{}{}{}", + review.repo_owner(), + review.repo_name(), + review.id(), + &file_hash, + diff_side_str, + self.def_line + ); + } else { + return format!("https://github.com/{}/{}/blob/{}/{}#L{}", + review.repo_owner(), + review.repo_name(), + review.base_head_commit(), + subgraph.name(), + self.def_line + ); + } + } + return "".to_string(); + } + + fn get_style_class(&self) -> String { + let class_str_prefix = format!("class {}", self.mermaid_id()); + match self.color.as_str() { + "green" => format!("{} added", &class_str_prefix), + "red" => format!("{} deleted", &class_str_prefix), + "yellow" => format!("{} modified", &class_str_prefix), + _ => "".to_string() + } } } @@ -283,10 +331,12 @@ impl MermaidGraphElements { source_file: &str, dest_file: &str, source_color: &str, - dest_color: &str + dest_color: &str, + source_def_line: usize, + dest_def_line: usize ) { - self.create_node(source_file, source_func_name, source_color); - self.create_node(dest_file, dest_func_name, dest_color); + self.create_node(source_file, source_func_name, source_color, source_def_line); + self.create_node(dest_file, dest_func_name, dest_color, dest_def_line); let edge = MermaidEdge::new( calling_line_num, source_func_name.to_string(), @@ -297,20 +347,20 @@ impl MermaidGraphElements { self.add_edge_to_edges(edge); } - fn create_node(&mut self, subgraph_key: &str, node_func_name: &str, node_color: &str) { + fn create_node(&mut self, subgraph_key: &str, node_func_name: &str, node_color: &str, def_line: usize) { if let Some(subgraph) = self.subgraphs.get_mut(subgraph_key) { if let Some(node) = subgraph.get_mut_node(node_func_name) { node.compare_and_change_color(node_color); } else { let mut node = MermaidNode::new(node_func_name.to_string(), - subgraph.mermaid_id().to_string()); + subgraph.name().to_string(), def_line); node.set_color(node_color); subgraph.add_node(node); } } else { let mut subgraph = MermaidSubgraph::new(subgraph_key.to_string()); let mut node = MermaidNode::new(node_func_name.to_string(), - subgraph.mermaid_id().to_string()); + subgraph.name().to_string(), def_line); node.set_color(node_color); subgraph.add_node(node); self.add_subgraph(subgraph); @@ -343,16 +393,16 @@ impl MermaidGraphElements { all_edges_str } - fn render_subgraphs(&self) -> String { + fn render_subgraphs(&self, review: &Review) -> String { self.subgraphs .values() - .map(|subgraph| subgraph.render_subgraph()) + .map(|subgraph| subgraph.render_subgraph(review, &self.subgraphs)) .collect::>() .join("\n") } - pub fn render_elements(&self) -> String { - let all_elements_str = format!("{}\n{}", &self.render_subgraphs(), &self.render_edges()); + pub fn render_elements(&self, review: &Review) -> String { + let all_elements_str = format!("{}\n{}", &self.render_subgraphs(review), &self.render_edges()); all_elements_str } } diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs index 584cd860..bd6daaed 100644 --- a/vibi-dpu/src/graph/graph_edges.rs +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -33,7 +33,9 @@ async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, di &source_filename, dest_filename, "", - "green" + "green", + source_func_def.line_start(), + dest_func.line_start() ); } } @@ -63,7 +65,9 @@ async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, di &source_filename, dest_filename, "", - "green" + "green", + source_func_def.line_start(), + dest_func.line_start() ); } } @@ -95,7 +99,9 @@ async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, di &source_filename, dest_filename, "", - "red" + "red", + source_func_def.line_start(), + dest_func.line_start() ); } } @@ -124,7 +130,9 @@ async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, di &source_filename, dest_filename, "", - "red" + "red", + source_func_def.line_start(), + dest_func.line_start() ); } } @@ -169,7 +177,9 @@ async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphEl source_filename, dest_filename, "green", - "" + "", + source_func_def.line_start(), + dest_func_def.line_start() ); } } @@ -192,7 +202,9 @@ async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphEl source_filename, dest_filename, "green", - "" + "", + source_func_def.line_start(), + dest_func_def.line_start() ); } } @@ -219,7 +231,9 @@ async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphEl source_filename, dest_filename, "red", - "" + "", + source_func_def.line_start(), + dest_func_def.line_start() ); } } @@ -242,7 +256,9 @@ async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphEl source_filename, dest_filename, "red", - "" + "", + source_func_def.line_start(), + dest_func_def.line_start() ); } } diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs index 6eae65d0..55e107ee 100644 --- a/vibi-dpu/src/graph/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -89,7 +89,7 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review // } // log::debug!("[generate_flowchart_elements] subgraph_map = {:#?}", &subgraph_map); // Render content string - let elems_str = graph_elems.render_elements(); + let elems_str = graph_elems.render_elements(review); // let subgraphs_str = subgraph_map.values().map( // |subgraph| subgraph.render_subgraph() // ).collect::>().join("\n"); From c47e53256946811ec32100120ddf1514542ba9ae Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Fri, 6 Sep 2024 07:36:39 +0530 Subject: [PATCH 25/43] fix node rendering and implement subgraph rendering --- vibi-dpu/src/graph/elements.rs | 125 +++++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 36 deletions(-) diff --git a/vibi-dpu/src/graph/elements.rs b/vibi-dpu/src/graph/elements.rs index d01a1809..99aeffd5 100644 --- a/vibi-dpu/src/graph/elements.rs +++ b/vibi-dpu/src/graph/elements.rs @@ -78,13 +78,53 @@ impl MermaidSubgraph { all_nodes.push(node.render_node(review, subgraph_map)); } let subgraph_str = format!( - "\tsubgraph {} [{}]\n{}\nend\n", + "\tsubgraph {} [{}]\n{}\n\tend\n{}\n", self.mermaid_id, self.name, - all_nodes.join("\n") + all_nodes.join("\n"), + self.render_subgraph_style() ); subgraph_str } + + fn render_subgraph_style(&self) -> String { + let mut class_str = ""; + for (_, node) in self.nodes() { + match node.color().as_str() { + "yellow" => { + class_str = "modified"; + break; + }, + "red" => { + match class_str { + "green" | "yellow" => { + class_str = "modified"; + break; + }, + "" | "red" | _ => { + class_str = "red"; + } + } + }, + "green" => { + match class_str { + "red" | "yellow" => { + class_str = "modified"; + break; + }, + "" | "green" | _ => { + class_str = "green"; + } + } + } + "" | _ => () + } + } + if class_str != "" { + return format!("\tclass {} {}", self.mermaid_id(), class_str); + } + return "".to_string(); + } } #[derive(Debug, Serialize, Default, Deserialize, Clone)] @@ -126,11 +166,6 @@ impl MermaidNode { self.color = color.to_string() } - // Setter for function_name - pub fn set_function_name(&mut self, function_name: String) { - self.function_name = function_name; - } - pub fn compare_and_change_color(&mut self, node_color: &str) { if (self.color() == "red" && node_color == "green") || (self.color() == "green" && node_color == "red") { @@ -139,8 +174,7 @@ impl MermaidNode { } pub fn render_node(&self, review: &Review, subgraph_map: &HashMap) -> String { - // TODO FIXME - get line num or funcdef obj - let url_str = format!("click {} href \"{}\" _blank", + let url_str = format!("\tclick {} href \"{}\" _blank", self.mermaid_id(), self.get_node_str(review, subgraph_map)); let class_str = self.get_style_class(); let node_str = format!("\t{}[{}]", &self.mermaid_id, &self.function_name); @@ -150,30 +184,40 @@ impl MermaidNode { fn get_node_str(&self, review: &Review, subgraph_map: &HashMap) -> String { if let Some(subgraph) = subgraph_map.get(self.parent_id()) { let file_hash = sha256::digest(subgraph.name()); - let mut diff_side_str = ""; - if self.color != "" { - if self.color == "green" || self.color == "yellow" { - diff_side_str = "R"; - } else if self.color == "red" { - diff_side_str = "L"; + return match self.color.as_str() { + "green" | "yellow" => { + let diff_side_str = "R"; + format!( + "https://github.com/{}/{}/pull/{}/files#diff-{}{}{}", + review.repo_owner(), + review.repo_name(), + review.id(), + &file_hash, + diff_side_str, + self.def_line + ) } - return format!("https://github.com/{}/{}/pull/{}/files#diff-{}{}{}", - review.repo_owner(), - review.repo_name(), - review.id(), - &file_hash, - diff_side_str, - self.def_line - ); - } else { - return format!("https://github.com/{}/{}/blob/{}/{}#L{}", + "red" => { + let diff_side_str = "L"; + format!( + "https://github.com/{}/{}/pull/{}/files#diff-{}{}{}", + review.repo_owner(), + review.repo_name(), + review.id(), + &file_hash, + diff_side_str, + self.def_line + ) + } + "" | _ => format!( + "https://github.com/{}/{}/blob/{}/{}#L{}", review.repo_owner(), review.repo_name(), review.base_head_commit(), subgraph.name(), self.def_line - ); - } + ), + }; } return "".to_string(); } @@ -181,10 +225,10 @@ impl MermaidNode { fn get_style_class(&self) -> String { let class_str_prefix = format!("class {}", self.mermaid_id()); match self.color.as_str() { - "green" => format!("{} added", &class_str_prefix), - "red" => format!("{} deleted", &class_str_prefix), - "yellow" => format!("{} modified", &class_str_prefix), - _ => "".to_string() + "green" => format!("\t{} added", &class_str_prefix), + "red" => format!("\t{} deleted", &class_str_prefix), + "yellow" => format!("\t{} modified", &class_str_prefix), + "" | _ => "".to_string() } } } @@ -394,11 +438,20 @@ impl MermaidGraphElements { } fn render_subgraphs(&self, review: &Review) -> String { - self.subgraphs - .values() - .map(|subgraph| subgraph.render_subgraph(review, &self.subgraphs)) - .collect::>() - .join("\n") + format!("{}\n{}", + self.subgraphs + .values() + .map(|subgraph| subgraph.render_subgraph(review, &self.subgraphs)) + .collect::>() + .join("\n"), + self.subgraph_style_defs()) + } + + fn subgraph_style_defs(&self) -> String { + let modified_class_def = "\tclassDef modified stroke:black,fill:yellow"; + let added_class_def = "\tclassDef added stroke:black,fill:#b7e892,color:black"; + let deleted_class_def = "\tclassDef deleted stroke:black,fill:red"; + format!("{}\n{}\n{}", modified_class_def, added_class_def, deleted_class_def) } pub fn render_elements(&self, review: &Review) -> String { From 0eeff01e1d1add324de4b663d991ef60c1d5c0a9 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Fri, 6 Sep 2024 08:38:44 +0530 Subject: [PATCH 26/43] implement edge rendering and flowchart config fix --- vibi-dpu/src/graph/elements.rs | 110 ++++++++++++++----------- vibi-dpu/src/graph/mermaid_elements.rs | 11 ++- 2 files changed, 74 insertions(+), 47 deletions(-) diff --git a/vibi-dpu/src/graph/elements.rs b/vibi-dpu/src/graph/elements.rs index 99aeffd5..ec9ad5b2 100644 --- a/vibi-dpu/src/graph/elements.rs +++ b/vibi-dpu/src/graph/elements.rs @@ -53,15 +53,15 @@ impl MermaidSubgraph { } pub fn add_node(&mut self, node: MermaidNode) { - if self.nodes.contains_key(node.mermaid_id()) { + if self.nodes.contains_key(node.function_name()) { log::error!( "[add_node] Node already exists: old - {:#?}, new - {:#?}", - &self.nodes[node.mermaid_id()], + &self.nodes[node.function_name()], node ); return; } - self.nodes.insert(node.mermaid_id().to_string(), node.to_owned()); + self.nodes.insert(node.function_name().to_string(), node.to_owned()); } pub fn get_node(&self, func_name: &str) -> Option<&MermaidNode> { @@ -153,6 +153,10 @@ impl MermaidNode { &self.color } + pub fn function_name(&self) -> &String { + &self.function_name + } + // Getter for mermaid_id pub fn mermaid_id(&self) -> &String { &self.mermaid_id @@ -317,39 +321,6 @@ impl MermaidEdge { ); return edge_key; } - - pub fn render_edge_definition(&self, subgraph_map: &HashMap) -> String { - let src_subgraph_opt = subgraph_map.get(&self.src_subgraph_key); - if src_subgraph_opt.is_none() { - log::debug!("[render_edge_definition] Unable to get subgraph: {}", &self.src_subgraph_key); - return "".to_string(); - } - let src_node_opt = src_subgraph_opt.expect("Empty src_subgraph_opt").nodes().get(&self.src_func_key); - if src_node_opt.is_none() { - log::debug!("[render_edge_definition] Unable to get node: {} in subgraph: {}", &self.src_func_key, &self.src_subgraph_key); - return "".to_string(); - } - let src_node = src_node_opt.expect("Empty src_node_opt"); - - let dest_subgraph_opt = subgraph_map.get(&self.dest_subgraph_key); - if dest_subgraph_opt.is_none() { - log::debug!("[render_edge_definition] Unable to get subgraph: {}", &self.dest_subgraph_key); - return "".to_string(); - } - let dest_node_opt = dest_subgraph_opt.expect("Empty src_subgraph_opt").nodes().get(&self.dest_func_key); - if dest_node_opt.is_none() { - log::debug!("[render_edge_definition] Unable to get node: {} in subgraph: {}", &self.dest_func_key, &self.dest_subgraph_key); - return "".to_string(); - } - let dest_node = dest_node_opt.expect("Empty src_node_opt"); - let edge_str = format!("\t{} -- Line {} --> {}\n", src_node.mermaid_id(), self.line, dest_node.mermaid_id()); - edge_str - } - - pub fn render_edge_style(&self) -> String { - let style_str = format!("stroke:{},stroke-width:4px;", self.color()); - style_str - } } #[derive(Debug, Default, Clone)] @@ -426,16 +397,16 @@ impl MermaidGraphElements { self.edges.insert(edge_key, edge); } - fn render_edges(&self) -> String { - let mut all_edges = Vec::::new(); - let mut all_edges_style = Vec::::new(); - for (idx, (_, edge)) in self.edges.iter().enumerate() { - all_edges.push(edge.render_edge_definition(&self.subgraphs)); - all_edges_style.push(format!("\tlinkStyle {} {}", idx, edge.render_edge_style())); - } - let all_edges_str = format!("{}{}", all_edges.join("\n"), all_edges_style.join("\n")); - all_edges_str - } + // fn render_edges(&self) -> String { + // let mut all_edges = Vec::::new(); + // let mut all_edges_style = Vec::::new(); + // for (idx, (_, edge)) in self.edges.iter().enumerate() { + // all_edges.push(edge.render_edge_definition(&self.subgraphs)); + // all_edges_style.push(format!("\tlinkStyle {} {}", idx, edge.render_edge_style())); + // } + // let all_edges_str = format!("{}{}", all_edges.join("\n"), all_edges_style.join("\n")); + // all_edges_str + // } fn render_subgraphs(&self, review: &Review) -> String { format!("{}\n{}", @@ -458,4 +429,51 @@ impl MermaidGraphElements { let all_elements_str = format!("{}\n{}", &self.render_subgraphs(review), &self.render_edges()); all_elements_str } + + fn render_edges(&self) -> String { + let mut edge_defs = Vec::::new(); + let mut default_edge_styles = Vec::::new(); + let mut green_edge_styles = Vec::::new(); + let mut red_edge_styles = Vec::::new(); + let mut yellow_edge_styles = Vec::::new(); + for (_, edge) in &self.edges { + let src_node_id = self.subgraphs[edge.src_subgraph_key()].nodes()[edge.src_func_key()].mermaid_id(); + let dest_node_id = self.subgraphs[edge.dest_subgraph_key()].nodes()[edge.dest_func_key()].mermaid_id(); + let edge_def_str = format!("\t{} ==\"Line {}\" =====>{}", src_node_id, edge.line(), dest_node_id); + edge_defs.push(edge_def_str); + match edge.color().as_str() { + "red" => red_edge_styles.push((edge_defs.len() - 1).to_string()), + "green" => green_edge_styles.push((edge_defs.len() - 1).to_string()), + "yellow" => yellow_edge_styles.push((edge_defs.len() - 1).to_string()), + "" | _ => default_edge_styles.push((edge_defs.len() - 1).to_string()) + } + } + if !edge_defs.is_empty() { + let default_edges_str = match default_edge_styles.is_empty() { + true => "".to_string(), + false => format!("\tlinkStyle {} stroke-width:1", default_edge_styles.join(",")) + }; + let green_edges_str = match green_edge_styles.is_empty() { + true => "".to_string(), + false => format!("\tlinkStyle {} stroke:green,stroke-width:8", green_edge_styles.join(",")) + }; + let red_edges_str = match red_edge_styles.is_empty() { + true => "".to_string(), + false => format!("\tlinkStyle {} stroke:red,stroke-width:10", red_edge_styles.join(",")) + }; + let yellow_edges_str = match yellow_edge_styles.is_empty() { + true => "".to_string(), + false => format!("\tlinkStyle {} stroke:#ffe302,stroke-width:10", yellow_edge_styles.join(",")) + }; + return format!("{}\n{}\n{}\n{}\n{}", + edge_defs.join("\n"), + &default_edges_str, + &green_edges_str, + &red_edges_str, + &yellow_edges_str + ); + } + + return "".to_string(); + } } diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs index 55e107ee..fe0fe1f9 100644 --- a/vibi-dpu/src/graph/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -12,7 +12,16 @@ pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Rev } let flowchart_content = flowchart_content_res.expect("Empty flowchart_content_res"); let flowchart_str = format!( - "%%{{init: {{\"flowchart\": {{\"htmlLabels\": false}}}} }}%%\nflowchart LR\n{}\n", + "%%{{init: {{ \ + 'theme': 'neutral', \ + 'themeVariables': {{ \ + 'fontSize': '20px' \ + }}, \ + 'flowchart': {{ \ + 'nodeSpacing': 100, \ + 'rankSpacing': 100 \ + }} \ + }} }}%%\n{}", &flowchart_content ); return Some(flowchart_str); From cfe569ff926eb4da7659b0782f1471ab60872eb6 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Fri, 6 Sep 2024 08:49:14 +0530 Subject: [PATCH 27/43] misc fixes and todos --- vibi-dpu/src/core/review.rs | 2 +- vibi-dpu/src/graph/elements.rs | 8 +++----- vibi-dpu/src/graph/file_imports.rs | 9 +++++++-- vibi-dpu/src/graph/graph_edges.rs | 1 - 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/vibi-dpu/src/core/review.rs b/vibi-dpu/src/core/review.rs index 5b0887c8..49ccaa92 100644 --- a/vibi-dpu/src/core/review.rs +++ b/vibi-dpu/src/core/review.rs @@ -231,7 +231,7 @@ fn create_and_save_github_review_object(deserialized_data: &Value) -> Option) -> Option Date: Fri, 6 Sep 2024 09:18:13 +0530 Subject: [PATCH 28/43] add edge link --- vibi-dpu/src/graph/elements.rs | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/vibi-dpu/src/graph/elements.rs b/vibi-dpu/src/graph/elements.rs index 0cc5ea81..9fbee4e3 100644 --- a/vibi-dpu/src/graph/elements.rs +++ b/vibi-dpu/src/graph/elements.rs @@ -424,11 +424,11 @@ impl MermaidGraphElements { } pub fn render_elements(&self, review: &Review) -> String { - let all_elements_str = format!("{}\n{}", &self.render_subgraphs(review), &self.render_edges()); + let all_elements_str = format!("{}\n{}", &self.render_subgraphs(review), &self.render_edges(review)); all_elements_str } - fn render_edges(&self) -> String { + fn render_edges(&self, review: &Review) -> String { let mut edge_defs = Vec::::new(); let mut default_edge_styles = Vec::::new(); let mut green_edge_styles = Vec::::new(); @@ -437,7 +437,33 @@ impl MermaidGraphElements { for (_, edge) in &self.edges { let src_node_id = self.subgraphs[edge.src_subgraph_key()].nodes()[edge.src_func_key()].mermaid_id(); let dest_node_id = self.subgraphs[edge.dest_subgraph_key()].nodes()[edge.dest_func_key()].mermaid_id(); - let edge_def_str = format!("\t{} ==\"Line {}\" =====>{}", src_node_id, edge.line(), dest_node_id); + let file_hash = sha256::digest(edge.src_subgraph_key()); + let edge_link_str = match edge.color().as_str() { + "red" => format!("https://github.com/{}/{}/pull/{}/files#diff-{}L{}", + review.repo_owner(), + review.repo_name(), + review.id(), + &file_hash, + edge.line() + ), + "green" | "yellow" => format!("https://github.com/{}/{}/pull/{}/files#diff-{}R{}", + review.repo_owner(), + review.repo_name(), + review.id(), + &file_hash, + edge.line() + ), + "" | _ => format!("https://github.com/{}/{}/blob/{}/{}#L{}", + review.repo_owner(), + review.repo_name(), + review.base_head_commit(), + edge.src_subgraph_key(), + edge.line() + ) + }; + let edge_def_str = format!( + "\t{} ==\"Line {}\" =====>{}", + src_node_id, edge_link_str, edge.line(), dest_node_id); edge_defs.push(edge_def_str); match edge.color().as_str() { "red" => red_edge_styles.push((edge_defs.len() - 1).to_string()), From 9fe33f9ca42d9e77a523e0a0579ab7e2573487bc Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Mon, 16 Sep 2024 07:19:00 +0530 Subject: [PATCH 29/43] fixed adding func calls for hunks --- vibi-dpu/src/core/diff_graph.rs | 2 +- vibi-dpu/src/db/graph_info.rs | 8 +- vibi-dpu/src/graph/elements.rs | 10 +- vibi-dpu/src/graph/file_imports.rs | 35 ++- vibi-dpu/src/graph/function_call.rs | 3 + vibi-dpu/src/graph/function_line_range.rs | 24 +- vibi-dpu/src/graph/gitops.rs | 46 +-- vibi-dpu/src/graph/graph_edges.rs | 13 +- vibi-dpu/src/graph/graph_info.rs | 325 ++++++---------------- vibi-dpu/src/graph/mermaid_elements.rs | 241 +--------------- vibi-dpu/src/graph/utils.rs | 56 ++-- 11 files changed, 199 insertions(+), 564 deletions(-) diff --git a/vibi-dpu/src/core/diff_graph.rs b/vibi-dpu/src/core/diff_graph.rs index 491d1d99..e4e66c52 100644 --- a/vibi-dpu/src/core/diff_graph.rs +++ b/vibi-dpu/src/core/diff_graph.rs @@ -16,7 +16,7 @@ pub async fn send_diff_graph(review: &Review, excluded_files: &Vec, sm } async fn diff_graph_comment_text(excluded_files: &Vec, small_files: &Vec, review: &Review) -> String { - let mut comment = "Relevant users for this PR:\n\n".to_string(); + let mut comment = "Diff Graph:\n\n".to_string(); let all_diff_files: Vec = excluded_files .iter() diff --git a/vibi-dpu/src/db/graph_info.rs b/vibi-dpu/src/db/graph_info.rs index 8b349574..1a700f7e 100644 --- a/vibi-dpu/src/db/graph_info.rs +++ b/vibi-dpu/src/db/graph_info.rs @@ -1,7 +1,7 @@ use sled::IVec; -use crate::{db::config::get_db, graph::file_imports::AllFileImportInfo}; -pub fn save_import_info_to_db(review_key: &str, commit_id: &str, all_imports: &AllFileImportInfo) { +use crate::{db::config::get_db, graph::file_imports::FilesImportInfo}; +pub fn save_import_info_to_db(review_key: &str, commit_id: &str, all_imports: &FilesImportInfo) { let db = get_db(); let graph_info_key = format!("graph_info/{}/{}", review_key, commit_id); // Serialize repo struct to JSON @@ -16,7 +16,7 @@ pub fn save_import_info_to_db(review_key: &str, commit_id: &str, all_imports: &A log::debug!("[save_graph_info_to_db] Graph Info succesfully upserted: {:#?}", all_imports); } -pub fn get_import_info_from_db(review_key: &str, commit_id: &str) -> Option { +pub fn get_import_info_from_db(review_key: &str, commit_id: &str) -> Option { let db = get_db(); let graph_info_key = format!("graph_info/{}/{}", review_key, commit_id); let graph_info_res = db.get(IVec::from(graph_info_key.as_bytes())); @@ -40,6 +40,6 @@ pub fn get_import_info_from_db(review_key: &str, commit_id: &str) -> Option, +} + +impl ImportPaths { + pub fn imports(&self) -> &Vec { + &self.imports + } +} + #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct ChunkImportInfo { import_lines: FileImportLines, @@ -92,11 +103,11 @@ impl FileImportInfo { } #[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct AllFileImportInfo { +pub struct FilesImportInfo { file_import_map: HashMap } -impl AllFileImportInfo { +impl FilesImportInfo { pub fn files(&self) -> Vec<&String> { self.file_import_map.keys().collect() } @@ -114,7 +125,7 @@ impl AllFileImportInfo { } } -pub async fn get_import_lines(file_paths: &Vec) -> Option { +pub async fn get_import_lines(file_paths: &Vec) -> Option { let mut all_import_info = HashMap::::new(); let system_prompt_opt = read_file("/app/prompts/prompt_import_lines"); if system_prompt_opt.is_none() { @@ -138,7 +149,7 @@ pub async fn get_import_lines(file_paths: &Vec) -> Option::new(); for chunk in chunks { @@ -164,7 +175,7 @@ pub async fn get_import_lines(file_paths: &Vec) -> Option Option { @@ -175,7 +186,7 @@ async fn get_import_lines_chunk(system_prompt_lines: &str, chunk_str: &str, file chunk: chunk_str.to_string() } }; let llm_req_res = serde_json::to_string(&llm_req); if llm_req_res.is_err() { - log::error!("[get_function_defs_in_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); + log::error!("[get_import_lines_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); return None; } let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); @@ -201,7 +212,7 @@ async fn get_import_lines_chunk(system_prompt_lines: &str, chunk_str: &str, file } async fn get_import_path_file(numbered_content: &Vec, import_line: FileImportLines, system_prompt: &str, file_path: &str) -> Option> { - let mut import_paths = Vec::::new(); + let mut import_paths = Vec::::new(); // get import lines from numbered lines let import_lines_str_opt = numbered_import_lines(numbered_content, import_line); if import_lines_str_opt.is_none() { @@ -238,7 +249,7 @@ async fn get_import_path_file(numbered_content: &Vec, import_line: FileI import_res.expect_err("Empty error in funcdefs_res")); continue; } - let import_path: ImportPath = import_res.expect("Uncaught error in funcdefs_res"); + let import_path: ImportPaths = import_res.expect("Uncaught error in funcdefs_res"); import_paths.push(import_path); } } @@ -246,7 +257,11 @@ async fn get_import_path_file(numbered_content: &Vec, import_line: FileI if import_paths.is_empty() { return None; } - return Some(import_paths); + let import_path_vec: Vec = import_paths + .iter() + .flat_map(|ip| ip.imports.iter().cloned()) + .collect(); + return Some(import_path_vec); } fn numbered_import_lines(numbered_content: &Vec, import_line: FileImportLines) -> Option>{ diff --git a/vibi-dpu/src/graph/function_call.rs b/vibi-dpu/src/graph/function_call.rs index 33c746ea..bf525c8f 100644 --- a/vibi-dpu/src/graph/function_call.rs +++ b/vibi-dpu/src/graph/function_call.rs @@ -10,6 +10,9 @@ pub struct FunctionCallChunk { } impl FunctionCallChunk { + pub fn new(function_calls: Vec) -> Self { + Self { function_calls } + } pub fn function_calls(&self) -> &Vec { &self.function_calls } diff --git a/vibi-dpu/src/graph/function_line_range.rs b/vibi-dpu/src/graph/function_line_range.rs index ce7b90cd..b96049ea 100644 --- a/vibi-dpu/src/graph/function_line_range.rs +++ b/vibi-dpu/src/graph/function_line_range.rs @@ -26,12 +26,12 @@ impl FuncDefInfo { &self.name } - pub fn line_start(&self) -> usize { - self.line_start + pub fn line_start(&self) -> &usize { + &self.line_start } - pub fn line_end(&self) -> usize { - self.line_end + pub fn line_end(&self) -> &usize { + &self.line_end } } @@ -117,12 +117,16 @@ struct LlmFuncDefRequest { #[derive(Debug, Serialize, Default, Deserialize, Clone)] struct LlmFuncDef { + #[serde(default)] name: String, - line_num: usize, + #[serde(default)] + line_start: usize, + #[serde(default)] parent: String } #[derive(Debug, Serialize, Default, Deserialize, Clone)] struct LlmFuncDefResponse { + #[serde(default)] functions: Vec } @@ -166,7 +170,7 @@ pub async fn generate_function_map(file_paths: &Vec) -> Option) -> Option Option< let funcdefs_res = serde_json::from_str(&llm_response); if funcdefs_res.is_err() { log::error!( - "[get_function_defs_in_chunk] funcdefs error: {}", + "[get_function_defs_in_chunk] funcdefs error: {:?}", funcdefs_res.expect_err("Empty error in funcdefs_res")); return None; } @@ -232,7 +236,7 @@ async fn get_function_defs_in_chunk(chunk: &str, system_prompt: &str) -> Option< async fn get_function_boundaries_in_chunk(file_lines_numbered: &Vec, func_def_line_num: usize, system_prompt: &str) -> Option { // divide lines into chunks and call with each chunk until line_end is found or files is empty - let chunk_size = 70; + let chunk_size = 40; let mut start = func_def_line_num; while start < file_lines_numbered.len() { diff --git a/vibi-dpu/src/graph/gitops.rs b/vibi-dpu/src/graph/gitops.rs index 464d110e..1be89b7d 100644 --- a/vibi-dpu/src/graph/gitops.rs +++ b/vibi-dpu/src/graph/gitops.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, path::PathBuf, process::Command, str::{self, FromStr}}; +use std::{collections::HashMap, path::{Path, PathBuf}, process::Command, str::{self, FromStr}}; use crate::utils::{gitops::StatItem, review::Review}; @@ -6,16 +6,15 @@ use crate::utils::{gitops::StatItem, review::Review}; pub struct HunkDiffLines { start_line: usize, end_line: usize, - content: Vec, } impl HunkDiffLines { - pub fn start_line(&self) -> usize { - self.start_line + pub fn start_line(&self) -> &usize { + &self.start_line } - pub fn end_line(&self) -> usize { - self.end_line + pub fn end_line(&self) -> &usize { + &self.end_line } } @@ -49,15 +48,13 @@ impl HunkDiffMap { self.file_line_map.keys().collect::>() } - pub fn all_files_pathbuf(&self) -> Vec { + pub fn all_files_pathbuf(&self, clone_dir: &str) -> Vec { + let base_path = Path::new(clone_dir); self.file_line_map.keys() .filter_map(|s| { - // Try to convert each &str to a PathBuf - let s_pathbuf_res = PathBuf::from_str(s); - match s_pathbuf_res { - Ok(pathbuf) => Some(pathbuf), - Err(_) => None, - } + let relative_path = Path::new(s); + let abs_filepath = base_path.join(relative_path); + Some(abs_filepath) }) .collect::>() } @@ -100,8 +97,6 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu let diffstr = diffstr_res.expect("Uncaught error in diffstr_res"); log::debug!("[extract_hunks] diffstr = {}", &diffstr); - let mut current_add_content = Vec::new(); - let mut current_del_content = Vec::new(); let mut current_add_start = 0; let mut current_del_start = 0; let mut current_add_end = 0; @@ -117,21 +112,17 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu file_hunks.added_hunks.push(HunkDiffLines { start_line: current_add_start, end_line: current_add_end, - content: current_add_content.clone(), }); } if in_del_hunk { file_hunks.deleted_hunks.push(HunkDiffLines { start_line: current_del_start, end_line: current_del_end, - content: current_del_content.clone(), }); } // Reset states for next hunk in_add_hunk = false; in_del_hunk = false; - current_add_content.clear(); - current_del_content.clear(); let parts: Vec<&str> = line.split_whitespace().collect(); if parts.len() > 2 { @@ -154,14 +145,6 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu } } } - } else if line.starts_with('-') { - if in_del_hunk { - current_del_content.push(line[1..].to_string()); - } - } else if line.starts_with('+') { - if in_add_hunk { - current_add_content.push(line[1..].to_string()); - } } } @@ -170,18 +153,19 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu file_hunks.added_hunks.push(HunkDiffLines { start_line: current_add_start, end_line: current_add_end, - content: current_add_content.clone(), }); } if in_del_hunk { file_hunks.deleted_hunks.push(HunkDiffLines { start_line: current_del_start, end_line: current_del_end, - content: current_del_content.clone(), }); } - - file_hunk_map.file_line_map.insert(filepath.to_string(), file_hunks); + let abs_filepath = Path::new(review.clone_dir()); + let abs_file_pathbuf = abs_filepath.join(Path::new(filepath)); + file_hunk_map.file_line_map.insert( + abs_file_pathbuf.to_str().expect("Unable to deserialize pathbuf").to_string(), + file_hunks); } return file_hunk_map; diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs index 1c710ff7..298d76a9 100644 --- a/vibi-dpu/src/graph/graph_edges.rs +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -1,14 +1,14 @@ use std::{path::{Path, PathBuf}, str::FromStr}; use crate::utils::{gitops::git_checkout_commit, review::Review}; -use super::{elements::MermaidGraphElements, file_imports::{AllFileImportInfo, ImportPath}, function_call::function_calls_in_file, function_line_range::{generate_function_map, FuncDefInfo, FunctionFileMap}, graph_info::DiffGraph, utils::match_overlap}; +use super::{elements::MermaidGraphElements, file_imports::{FilesImportInfo, ImportPath}, function_call::function_calls_in_file, function_line_range::{generate_function_map, FuncDefInfo, FunctionFileMap}, graph_info::DiffGraph, utils::match_overlap}; -pub async fn graph_edges(review: &Review, all_import_info: &AllFileImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { - outgoing_edges(diff_graph, graph_elems).await; +pub async fn graph_edges(review: &Review, all_import_info: &FilesImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { + outgoing_edges(diff_graph, graph_elems, review).await; incoming_edges(review, all_import_info, diff_graph, graph_elems).await; } -async fn incoming_edges(review: &Review, all_import_info: &AllFileImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { +async fn incoming_edges(review: &Review, all_import_info: &FilesImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { for (dest_filename, func_defs) in diff_graph.diff_func_defs() { for dest_func in func_defs.added_func_defs() { git_checkout_commit(review, review.pr_head_commit()); @@ -154,7 +154,7 @@ fn match_import_condition(dest_filename: &str, import_obj: &ImportPath, dest_fun 0.5) } -async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { +async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, review: &Review) { for (source_filename, func_calls) in diff_graph.diff_func_calls() { for source_func_call in func_calls.added_calls() { let dest_filename = source_func_call.import_info().import_path(); @@ -185,7 +185,8 @@ async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphEl } } // search in full graph - let dest_filepath = PathBuf::from_str(dest_filename).expect("Unable to get path"); + let mut dest_filepath = PathBuf::from_str(review.clone_dir()).expect("Unable to get path"); + dest_filepath.push(dest_filename); if let Some(all_file_funcdefs) = generate_function_map(&vec![dest_filepath]).await { // identify this particular func if let Some(func_defs) = all_file_funcdefs.functions_in_file(dest_filename) { diff --git a/vibi-dpu/src/graph/graph_info.rs b/vibi-dpu/src/graph/graph_info.rs index 49a87f69..79a76379 100644 --- a/vibi-dpu/src/graph/graph_info.rs +++ b/vibi-dpu/src/graph/graph_info.rs @@ -1,77 +1,6 @@ -use std::{collections::HashMap, path::PathBuf}; - -use serde::{Deserialize, Serialize}; - -use crate::{db::graph_info::{get_import_info_from_db, save_import_info_to_db}, graph::{file_imports::get_import_lines, function_line_range::generate_function_map, utils::all_code_files}, utils::{gitops::StatItem, review::Review}}; - -use super::{file_imports::{AllFileImportInfo, ImportPath}, function_call::{function_calls_in_file, FunctionCallChunk}, function_line_range::{AllFileFunctions, FuncDefInfo}, gitops::{get_changed_hunk_lines, HunkDiffMap}, utils::source_diff_files}; - -// #[derive(Debug, Serialize, Default, Deserialize, Clone)] -// pub struct DiffInfo { -// added_funcs: Option>>, // key is filename -// deleted_funcs: Option>>, // key is filename -// added_imports: Option>>, // key is filename -// deleted_imports: Option>> // key is filename -// } - -// impl DiffInfo { -// pub fn added_funcs(&self) -> &Option>> { -// &self.added_funcs -// } - -// pub fn deleted_funcs(&self) -> &Option>> { -// &self.deleted_funcs -// } - -// pub fn added_imports(&self) -> &Option>> { -// &self.added_imports -// } - -// pub fn deleted_imports(&self) -> &Option>> { -// &self.deleted_imports -// } -// } - -// async fn generate_graph_info(source_file_paths: &Vec) -> Option { -// // let function_map_opt = generate_function_map(source_file_paths).await; -// // if function_map_opt.is_none() { -// // log::error!("[generate_graph_info] Unable to generate function map"); -// // return None; -// // } -// // let function_map = function_map_opt.expect("Empty function_map_opt"); -// // log::debug!("[generate_graph_info] func map = {:?}", &function_map); -// let all_file_import_info_opt = get_import_lines(source_file_paths).await; -// if all_file_import_info_opt.is_none() { -// log::error!("[generate_graph_info] Unable to get import info for source files: {:#?}", source_file_paths); -// return None; -// } -// let all_file_import_info = all_file_import_info_opt.expect("Empty import_lines_opt"); -// let graph_info = GraphInfo { function_info: function_map, -// import_info: all_file_import_info }; -// return Some(graph_info); -// } - -// pub async fn generate_full_graph(repo_dir: &str, review_key: &str, commit_id: &str) -> Option { -// // check for graph db -// if let Some(graph_info) = get_import_info_from_db(review_key, commit_id) { -// return Some(graph_info); -// } -// let repo_code_files_opt = all_code_files(repo_dir); -// if repo_code_files_opt.is_none() { -// log::error!("[generate_full_graph] Unable to get file paths: {}", repo_dir); -// return None; -// } -// let repo_code_files = repo_code_files_opt.expect("Empty repo_code_files_opt"); -// let graph_info_opt = generate_graph_info(&repo_code_files).await; -// if graph_info_opt.is_none() { -// log::error!("[generate_full_graph] Unable to generate full graph for commit: {}", commit_id); -// return None; -// } -// let graph_info = graph_info_opt.expect("Empty graph_info_opt"); -// // save all this to db -// save_import_info_to_db(review_key, commit_id, &graph_info); -// return Some(graph_info); -// } +use std::collections::HashMap; +use crate::{graph::{file_imports::get_import_lines, function_line_range::generate_function_map}, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; +use super::{file_imports::{FilesImportInfo, ImportPath}, function_call::{function_calls_in_file, FunctionCallChunk}, function_line_range::{AllFileFunctions, FuncDefInfo}, gitops::{get_changed_hunk_lines, HunkDiffLines, HunkDiffMap}, utils::source_diff_files}; #[derive(Debug, Default, Clone)] pub struct DiffFuncDefs { @@ -109,6 +38,22 @@ impl FuncCall { pub fn call_info(&self) -> &Vec { &self.call_info } + + pub fn func_call_hunk_lines(&self, hunk_diff: &HunkDiffLines) -> Option { + let mut hunk_func_calls_lines = Vec::::new(); + for func_call in self.call_info() { + for call_line in func_call.function_calls() { + if hunk_diff.start_line() <= call_line && hunk_diff.end_line() >= call_line { + hunk_func_calls_lines.push(call_line.to_owned()); + } + } + } + if hunk_func_calls_lines.is_empty() { + return None; + } + let hunk_func_call = FuncCall{import_info: self.import_info.clone(), call_info: vec![FunctionCallChunk::new(hunk_func_calls_lines)]}; + return Some(hunk_func_call); + } } #[derive(Debug, Default, Clone)] @@ -138,7 +83,7 @@ impl DiffFuncCall { #[derive(Debug, Default, Clone)] pub struct DiffGraph { diff_files_func_defs: AllFileFunctions, - diff_files_imports: AllFileImportInfo, + diff_files_imports: FilesImportInfo, diff_func_defs: HashMap, diff_func_calls: HashMap } @@ -156,7 +101,7 @@ impl DiffGraph { &self.diff_files_func_defs } - pub fn all_file_imports(&self) -> &AllFileImportInfo { + pub fn all_file_imports(&self) -> &FilesImportInfo { &self.diff_files_imports } @@ -169,7 +114,7 @@ impl DiffGraph { } } -pub async fn generate_diff_graph(diff_files: &Vec, review: &Review) -> Option { +pub async fn generate_diff_graph(diff_files: &Vec, review: &Review, base_commit_import_info: &FilesImportInfo) -> Option { let diff_code_files_opt = source_diff_files(diff_files); if diff_code_files_opt.is_none() { log::debug!("[generate_diff_graph] No relevant source diff files in: {:#?}", diff_files); @@ -177,55 +122,47 @@ pub async fn generate_diff_graph(diff_files: &Vec, review: &Review) -> } let diff_code_files = diff_code_files_opt.expect("Empty diff_code_files_opt"); let hunk_diff_map = get_changed_hunk_lines(&diff_code_files, review); - let diff_graph_opt = process_hunk_diff(&hunk_diff_map).await; + // get func defs for base commit for files in diff + log::debug!("[generate_diff_graph] hunk diff map =======~~~~~~~~ {:#?}", &hunk_diff_map); + let diff_graph_opt = process_hunk_diff(&hunk_diff_map, base_commit_import_info, review).await; return diff_graph_opt; - // let diff_code_files_pathbuf: Vec = diff_code_files - // .iter() - // .filter_map(|s| { - // // Try to convert each &str to a PathBuf - // let s_pathbuf_res = PathBuf::from_str(&s.filepath); - // match s_pathbuf_res { - // Ok(pathbuf) => Some(pathbuf), - // Err(_) => None, - // } - // }) - // .collect(); - // let graph_info_opt = generate_graph_info(&diff_code_files_pathbuf).await; - // if graph_info_opt.is_none() { - // log::error!("[generate_diff_graph] Unable to generate diff graph"); - // return (None, deleted_files_opt); - // } - // let graph_info = graph_info_opt.expect("Empty graph_info_opt"); - // // return (Some(graph_info), deleted_files_opt); - // return None; } -async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap) -> Option { - let all_files = hunk_diff_map.all_files_pathbuf(); - let all_file_func_defs_opt = generate_function_map(&all_files).await; - let all_file_imports_opt = get_import_lines(&all_files).await; +async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, base_commit_import_info: &FilesImportInfo,review: &Review) -> Option { + // full graph func def and import info for diff selected files is required. + let all_diff_files = hunk_diff_map.all_files_pathbuf(review.clone_dir()); + let base_commit_func_defs_opt = generate_function_map(&all_diff_files).await; + if base_commit_func_defs_opt.is_none() { + log::debug!("[process_hunk_diff] Unable to generate func defs for base commit"); + return None; + } + let base_commit_func_defs = base_commit_func_defs_opt.expect("Empty let base_commit_func_defs_opt"); + git_checkout_commit(review, &review.pr_head_commit()); + let diff_func_defs_opt = generate_function_map(&all_diff_files).await; + let diff_imports_opt = get_import_lines(&all_diff_files).await; // TODO FIXME - opt logic - if all_file_func_defs_opt.is_none() { + if diff_func_defs_opt.is_none() { log::debug!("[process_hunk_diff] Unable to generate func definitions diff map"); return None; } - if all_file_imports_opt.is_none() { + if diff_imports_opt.is_none() { log::debug!("[process_hunk_diff] Unable to generate func imports diff map"); return None; } - let all_file_func_defs = all_file_func_defs_opt.expect("Empty all_file_func_defs_opt)"); - let all_file_imports = all_file_imports_opt.expect("Empty all_file_imports_opt"); + let diff_files_func_defs = diff_func_defs_opt.expect("Empty all_file_func_defs_opt)"); + let diff_files_imports = diff_imports_opt.expect("Empty all_file_imports_opt"); let mut diff_graph = DiffGraph { - diff_files_func_defs: all_file_func_defs, - diff_files_imports: all_file_imports, + diff_files_func_defs, + diff_files_imports, diff_func_defs: HashMap::new(), diff_func_calls: HashMap::new(), }; - for filepath in all_files { + let mut diff_func_calls_map: HashMap = HashMap::new(); + for filepath in &all_diff_files { let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); let mut diff_func_defs = DiffFuncDefs { added_func_defs: Vec::new(), deleted_func_defs: Vec::new()}; - let mut diff_func_calls = DiffFuncCall { + let mut diff_func_calls_add = DiffFuncCall { added_calls: Vec::new(), deleted_calls: Vec::new()}; if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { for hunk_diff in file_line_map.added_hunks() { @@ -238,8 +175,8 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap) -> Option { } } } - for hunk_diff in file_line_map.deleted_hunks() { - if let Some(funcs_map) = diff_graph.all_file_func_defs().functions_in_file(filename) { + for hunk_diff in file_line_map.deleted_hunks() { + if let Some(funcs_map) = base_commit_func_defs.functions_in_file(filename) { // find func_defs for files in hunks let funcs_def_vec = funcs_map.funcs_in_hunk(hunk_diff); if !funcs_def_vec.is_empty() { @@ -248,138 +185,56 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap) -> Option { } } } + // TODO FIXME - why no deleted func calls, and how is only diff part sent to find func calls? // find func call in hunks for each import + // want to record not all func_calls but hunk specific line numbers + // might need to reorder for loops to make sure repeated calcs are avoided if let Some(imports_info) = diff_graph.all_file_imports().file_import_info(filename) { for import_info in imports_info.all_import_paths() { + // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { - // add these func calls to something with file as key - let func_call = FuncCall{ import_info, call_info: func_calls }; - diff_func_calls.add_added_calls(func_call); + // func_calls is basically all func calls of a function in the latest commit of the file + if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + let func_call = FuncCall{ import_info, call_info: func_calls }; + for hunk_diff in file_line_map.added_hunks() { + if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { + diff_func_calls_add.add_added_calls(hunk_func_call); + } + } + } } } } + // Use full graph's import info + // do a git checkout to base commit + // do the same thing as done for added_calls } diff_graph.add_func_def(filename.to_string(), diff_func_defs); - diff_graph.add_diff_func_calls(filename.to_string(), diff_func_calls); + diff_func_calls_map.insert(filename.to_string(), diff_func_calls_add); + } + git_checkout_commit(review, &review.base_head_commit()); + for filepath in &all_diff_files { + let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); + let diff_func_call_del = diff_func_calls_map.entry(filename.to_string()).or_insert(DiffFuncCall { added_calls: Vec::new(), deleted_calls: Vec::new() }); + if let Some(imports_info) = base_commit_import_info.file_import_info(filename) { + for import_info in imports_info.all_import_paths() { + // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls + if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { + // func_calls is basically all func calls of a function in the latest commit of the file + if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + let func_call = FuncCall{ import_info, call_info: func_calls }; + for hunk_diff in file_line_map.deleted_hunks() { + if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { + diff_func_call_del.add_deleted_calls(hunk_func_call); + } + } + } + } + } + } + } + for (filename, diff_func_call) in diff_func_calls_map.iter() { + diff_graph.add_diff_func_calls(filename.to_owned(), diff_func_call.to_owned()); } return Some(diff_graph); -} - -// fn added_functions_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { -// let mut added_funcs = HashMap::>::new(); -// for filename in diff_graph.function_info().all_files() { -// let func_map_opt = full_graph.function_info().functions_in_file(filename); -// if func_map_opt.is_none() { -// if let Some(diff_func_map) = diff_graph.function_info().functions_in_file(filename) { -// let funcs_vec = diff_func_map.functions().to_owned(); -// added_funcs.entry(filename.to_string()) -// .or_insert_with(Vec::new) -// .extend(funcs_vec); -// } -// } else { -// let full_func_map = func_map_opt.expect("Empty func_map_opt"); -// if let Some(diff_func_map) = diff_graph.function_info().functions_in_file(filename) { -// for func in diff_func_map.functions() { -// if !full_func_map.is_func_in_file(func) { -// added_funcs.entry(filename.to_string()) -// .or_insert_with(Vec::new) -// .push(func.to_owned()); -// } -// } -// } -// } -// } -// if added_funcs.is_empty() { -// return None; -// } -// return Some(added_funcs); -// } - -// fn deleted_functions_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { -// let mut deleted_funcs = HashMap::>::new(); -// for filename in diff_graph.function_info().all_files() { -// // TODO - full file deleted? -// let funcs_opt = full_graph.function_info().functions_in_file(filename); -// if funcs_opt.is_none() { -// // file added -// } -// let full_funcs = funcs_opt.expect("Empty funcs_opt"); -// let diff_funcs = diff_graph.function_info().functions_in_file(filename).expect("Empty diff_funcs"); -// for func in full_funcs.functions() { -// if diff_funcs.is_func_in_file(func) { -// deleted_funcs.entry(filename.to_string()) -// .or_insert_with(Vec::new) -// .push(func.to_owned()); -// } -// } -// } -// if deleted_funcs.is_empty() { -// return None; -// } -// return Some(deleted_funcs) -// } - -// fn added_imports_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { -// let mut added_imports = HashMap::>::new(); -// for filename in diff_graph.import_info().files() { -// let diff_imports = diff_graph -// .import_info() -// .file_import_info(filename).expect("Empty diff imports"); -// let full_imports_opt = full_graph -// .import_info().file_import_info(filename); -// if full_imports_opt.is_none() { -// added_imports.entry(filename.to_string()) -// .or_insert_with(Vec::new) -// .extend(diff_imports.all_import_paths()); -// } else { -// for import_path in diff_imports.all_import_paths() { -// if !full_graph.import_info().is_import_in_file(filename, &import_path) { -// added_imports.entry(filename.to_string()) -// .or_insert_with(Vec::new) -// .push(import_path); -// } -// } -// } -// } -// if added_imports.is_empty() { -// return None; -// } -// return Some(added_imports); -// } - -// fn deleted_imports_diff(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> Option>> { -// let mut deleted_imports = HashMap::>::new(); -// // TODO - file deleted -// for filename in diff_graph.import_info().files() { -// let full_imports_opt = full_graph.import_info().file_import_info(filename); -// if full_imports_opt.is_none() { -// // file added -// } -// let full_imports = full_imports_opt.expect("Empty full_imports_opt"); -// for import_path in full_imports.all_import_paths() { -// if !diff_graph.import_info().is_import_in_file(filename, &import_path) { -// deleted_imports.entry(filename.to_string()) -// .or_insert_with(Vec::new) -// .push(import_path); -// } -// } -// } -// if deleted_imports.is_empty() { -// return None; -// } -// return Some(deleted_imports); -// } - -// pub fn generate_diff_info(full_graph: &GraphInfo, diff_graph: &GraphInfo) -> DiffInfo { - // Get added funcs and imports - // let added_funcs_opt = added_functions_diff(full_graph, diff_graph); - // let deleted_funcs_opt = deleted_functions_diff(full_graph, diff_graph); - // let added_imports_opt = added_imports_diff(full_graph, diff_graph); - // let deleted_imports_opt = deleted_imports_diff(full_graph, diff_graph); - // return DiffInfo { - // added_funcs: added_funcs_opt, - // deleted_funcs: deleted_funcs_opt, - // added_imports: added_imports_opt, - // deleted_imports: deleted_imports_opt - // }; -// } \ No newline at end of file +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs index fe0fe1f9..0d418e43 100644 --- a/vibi-dpu/src/graph/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -30,31 +30,23 @@ pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Rev async fn generate_flowchart_elements(diff_files: &Vec, review: &Review) -> Option { // generate full graph for base commit id git_checkout_commit(review, review.base_head_commit()); - // let full_graph_opt = generate_full_graph(&review.clone_dir(), - // &review.db_key(), &review.base_head_commit()).await; - // if full_graph_opt.is_none() { - // log::error!( - // "[generate_flowchart_elements] Unable to generate full graph for review: {}", - // review.id()); - // return None; - // } - // let full_graph = full_graph_opt.expect("Empty full_graph_opt"); - // // generate diff graph for head commit id let repo_code_files_opt = all_code_files(review.clone_dir()); if repo_code_files_opt.is_none() { log::error!( - "[generate_full_graph] Unable to get file paths: {}", review.clone_dir()); + "[generate_flowchart_elements] Unable to get file paths: {}", review.clone_dir()); return None; } let repo_code_files = repo_code_files_opt.expect("Empty repo_code_files_opt"); - let all_file_import_info_opt = get_import_lines(&repo_code_files).await; - if all_file_import_info_opt.is_none() { - log::error!("[generate_graph_info] Unable to get import info for source files: {:#?}", &repo_code_files); + let base_commit_import_info_opt = get_import_lines(&repo_code_files).await; + log::debug!("[generate_flowchart_elements] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all_file_import_info_opt = {:#?}", &base_commit_import_info_opt); + if base_commit_import_info_opt.is_none() { + log::error!("[generate_flowchart_elements] Unable to get import info for source files: {:#?}", &repo_code_files); return None; } - let all_file_import_info = all_file_import_info_opt.expect("Empty import_lines_opt"); + let base_commit_import_info = base_commit_import_info_opt.expect("Empty import_lines_opt"); git_checkout_commit(review, review.pr_head_commit()); - let diff_graph_opt = generate_diff_graph(diff_files, review).await; + let diff_graph_opt = generate_diff_graph(diff_files, review, &base_commit_import_info).await; + log::debug!("[generate_flowchart_elements] diff_graph_opt = {:#?}", &diff_graph_opt); if diff_graph_opt.is_none() { log::error!( "[generate_flowchart_elements] Unable to generate diff graph for review: {}", @@ -65,220 +57,7 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review // let diff_info = generate_diff_info(&full_graph, &diff_graph); let mut graph_elems = MermaidGraphElements::new(); git_checkout_commit(review, review.base_head_commit()); - graph_edges(review, &all_file_import_info, &diff_graph, &mut graph_elems).await; - - // let (file_lines_del_map, file_lines_add_map) = get_changed_files(diff_files, review); - // let mut subgraph_map = HashMap::::new(); - // let mut edges = MermaidEdges::new(Vec::::new()); - // let files: Vec = diff_files.iter().map(|item| item.filepath.clone()).collect(); - // for file in files.iter() { - // if file_lines_add_map.contains_key(file) { - // generate_mermaid_content( - // &mut subgraph_map, - // review, - // file, - // &file_lines_add_map, - // &mut edges, - // "green" - // ).await; - // } - // } - // git_checkout_commit(review, review.base_head_commit()); - // for file in files.iter() { - // if file_lines_del_map.contains_key(file) { - // generate_mermaid_content( - // &mut subgraph_map, - // review, - // file, - // &file_lines_del_map, - // &mut edges, - // "red" - // ).await; - // } - // } - // log::debug!("[generate_flowchart_elements] subgraph_map = {:#?}", &subgraph_map); - // Render content string + graph_edges(review, &base_commit_import_info, &diff_graph, &mut graph_elems).await; let elems_str = graph_elems.render_elements(review); - // let subgraphs_str = subgraph_map.values().map( - // |subgraph| subgraph.render_subgraph() - // ).collect::>().join("\n"); - // let edges_str = edges.render_edges(); - // let content_str = format!("{}\n{}", &subgraphs_str, &edges_str); return Some(elems_str); -} - -// async fn generate_mermaid_content( -// subgraph_map: &mut HashMap, review: &Review, file: &str, -// file_lines_map: &HashMap>, -// edges: &mut MermaidEdges, -// color: &str -// ) { -// if !file.ends_with(".rs") { -// log::debug!("[mermaid_comment] File extension not valid: {}", &file); -// return; -// } -// let file_path = format!("{}/{}", review.clone_dir(), &file); -// let file_contents_res = read_file(&file_path); -// if file_contents_res.is_none() { -// log::error!( -// "[generate_mermaid_content] Unable to read changed file content: {}", &file_path); -// return; -// } -// let file_contents = file_contents_res.expect("Empty file_contents_res"); -// let numbered_content = file_contents -// .lines() -// .enumerate() -// .map(|(index, line)| format!("{} {}", index, line)) -// .collect::>() -// .join("\n"); -// let flinemap_opt = extract_function_lines( -// &numbered_content, -// file -// ).await; -// if flinemap_opt.is_none() { -// log::debug!( -// "[generate_mermaid_content] Unable to generate function line map for file: {}", file); -// return; -// } -// let flinemap = flinemap_opt.expect("Empty flinemap_opt"); -// // deleted lines -// let called_info_del_opt = generate_called_function_info( -// file_lines_map, &numbered_content, file).await; -// if called_info_del_opt.is_none() { -// log::error!("[generate_mermaid_content] Unable to generate called functions info"); -// return; -// } -// let (called_funcs_del, called_func_paths_del) = called_info_del_opt.expect("Empty called_info_opt"); -// generate_callee_nodes(&called_func_paths_del, subgraph_map); -// generate_caller_elements( -// subgraph_map, -// &file_lines_map[file], -// &flinemap, -// &called_funcs_del, -// &called_func_paths_del, -// edges, -// &file, -// color); -// return; -// } - -// fn generate_caller_elements(subgraph_map: &mut HashMap, -// hunk_lines: &Vec<(usize, usize)>, -// flinemap: &Vec, -// called_funcs: &Vec, -// called_funcs_path: &Vec, -// edges: &mut MermaidEdges, -// filename: &str, -// color: &str) -// { -// for cf in called_funcs { -// let func_name_opt = get_func_from_line(cf.line, flinemap); -// if func_name_opt.is_none() { -// log::debug!("[generate_caller_elements] Unable to get func name for line: {:?}", cf.line); -// continue; -// } -// let func_name = func_name_opt.expect("Empty func_name_opt"); -// let caller_node; - -// // Borrow subgraph_map mutably to either retrieve or insert the subgraph -// let subgraph = subgraph_map.entry(filename.to_string()).or_insert_with(|| { -// MermaidSubgraph::new(filename.to_string(), HashMap::new()) -// }); - -// // Borrow subgraph mutably to either retrieve or insert the node -// if let Some(node) = subgraph.nodes().get(&func_name) { -// caller_node = node.to_owned(); -// } else { -// caller_node = MermaidNode::new(func_name.clone()); -// subgraph.add_node(caller_node.clone()); -// } - -// log::debug!("[generate_caller_elements] subgraph_map = {:#?}", subgraph_map); - -// for cfp in called_funcs_path { -// if cf.name == cfp.function_name { -// // Ensure we do not have an immutable borrow of subgraph_map while we borrow it immutably here -// if let Some(import_subgraph) = subgraph_map.get(&cfp.import_path) { -// if let Some(called_node) = import_subgraph.nodes().get(&cf.name) { -// edges.add_edge(MermaidEdge::new( -// cf.line, -// caller_node.clone(), -// called_node.to_owned(), -// color.to_string() -// )); -// } -// } -// } -// } -// log::debug!("[generate_caller_elements] edges = {:#?}", &edges); -// } -// } - - -// fn get_func_from_line(line: usize, flinemaps: &[FunctionLineMap]) -> Option { -// for flinemap in flinemaps { -// log::debug!("[get_func_from_line] flinemap = {:#?}, line: {}", &flinemap, line); -// log::debug!( -// "[get_func_from_line] condition = {:?}", -// (flinemap.line_start <= line as i32 && flinemap.line_end >= line as i32)); -// if flinemap.line_start <= line as i32 && flinemap.line_end >= line as i32 { -// log::debug!("[get_func_from_line] inside if"); -// return Some(flinemap.name.to_string()); -// } -// } -// return None; -// } - -// fn generate_callee_nodes( -// called_funcs_path: &[CalledFunctionPath], -// subgraph_map: &mut HashMap) -// { -// for cfp in called_funcs_path { -// if let Some(subgraph) = subgraph_map.get_mut(&cfp.import_path) { -// subgraph.add_node( -// MermaidNode::new(cfp.function_name.to_string()) -// ); -// } else { -// // Create new subgraph -// // Create new node -// // Add to subgraph_map -// let mut node_map = HashMap::::new(); -// node_map.insert(cfp.function_name.to_string(), MermaidNode::new(cfp.function_name.to_string())); -// let subgraph = MermaidSubgraph::new( -// cfp.import_path.to_string(), -// node_map -// ); -// subgraph_map.insert(cfp.import_path.to_string(), subgraph); -// } -// } -// return; -// } - -// async fn generate_called_function_info(file_lines_map: &HashMap>, -// numbered_content: &str, filename: &str -// ) -// -> Option<(Vec, Vec)> -// { -// let del_lines = &file_lines_map[filename]; -// let called_funcs_opt = extract_function_calls( -// del_lines, -// &numbered_content, -// filename -// ).await; -// if called_funcs_opt.is_none() { -// log::error!("[generate_called_function_info] Unable to get called functions for file: {}", filename); -// return None; -// } -// let called_funcs = called_funcs_opt.expect("Empty called_funcs_opt"); -// let called_func_paths_opt = extract_function_import_path( -// &called_funcs, -// &numbered_content, -// filename -// ).await; -// if called_func_paths_opt.is_none() { -// log::error!("[generate_called_function_info] Unable to get called function paths for file: {}", filename); -// return None; -// } -// let called_func_paths = called_func_paths_opt.expect("Empty called_func_paths_opt"); -// return Some((called_funcs, called_func_paths)); -// } \ No newline at end of file +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/utils.rs b/vibi-dpu/src/graph/utils.rs index 8302df21..aec31d51 100644 --- a/vibi-dpu/src/graph/utils.rs +++ b/vibi-dpu/src/graph/utils.rs @@ -2,7 +2,7 @@ use std::{collections::HashMap, path::{Path, PathBuf}, slice::Chunks}; use futures_util::StreamExt; use serde::{Deserialize, Serialize}; -use serde_json::json; +use serde_json::{json, Value}; use strsim::jaro_winkler; use walkdir::WalkDir; use std::fs; @@ -21,7 +21,7 @@ struct LlmResponse { pub async fn call_llm_api(prompt: String) -> Option { let client = get_client(); - let url = "http://host.docker.internal:11434/api/generate"; + let url = "http://34.100.208.132/api/generate"; log::debug!("[call_llm_api] Prompt = {:?}", &prompt); let response_res = client.post(url) .json(&json!({"model": "phind-codellama", "prompt": prompt})) @@ -52,17 +52,30 @@ pub async fn call_llm_api(prompt: String) -> Option { start = end + 1; } + // Process each chunk for chunk in chunks { - let parsed_chunk_res = serde_json::from_str(&chunk); + // Attempt to fix incomplete chunks + let fixed_chunk = if !chunk.starts_with("{") { + format!("{{{}", chunk) + } else if !chunk.ends_with("}") { + format!("{}{}", chunk, "}") + } else { + chunk.to_string() + }; + let parsed_chunk_res = serde_json::from_str::(&fixed_chunk); if parsed_chunk_res.is_err() { let e = parsed_chunk_res.expect_err("Empty error in parsed_chunk_res"); log::error!("[call_llm_api] Unable to deserialize {}: {:?}", chunk, e); continue; } - let parsed_chunk: LlmResponse = parsed_chunk_res.expect("Uncaught error in parsed_chunk_res"); - final_response.push_str(&parsed_chunk.response); - if parsed_chunk.done { - break; + let parsed_chunk = parsed_chunk_res.expect("Uncaught error in parsed_chunk_res"); + if let Some(parsed_response) = parsed_chunk.get("response").and_then(|v| v.as_str()){ + final_response.push_str(parsed_response); + } + if let Some(done_field) = parsed_chunk.get("done").and_then(|v| v.as_bool()) { + if done_field { + break; + } } } log::debug!("[call_llm_api] final_response = {:?}", &final_response); @@ -86,28 +99,6 @@ pub fn read_file(file: &str) -> Option { Some(content) } -pub fn get_specific_lines(line_numbers: Vec<(usize, usize)>, numbered_content: &str) -> String { - // Split the input content into lines and collect into a vector - let lines: Vec<&str> = numbered_content.lines().collect(); - let mut result = String::new(); - // Iterate over each line number we are interested in - for (mut start, mut end) in line_numbers { - if start > end { - let xchng = start; - start = end; - end = xchng; - } - for line_number in start..=end { - // Check if the line_number is within the bounds of the vector - if line_number < lines.len() { - result.push_str(lines[line_number]); - result.push('\n'); - } - } - } - return result; -} - pub fn generate_random_string(length: usize) -> String { const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyz"; let mut rng = rand::thread_rng(); @@ -128,7 +119,10 @@ pub fn all_code_files(dir: &str) -> Option> { let ext = path.extension().and_then(|ext| ext.to_str()); log::debug!("[generate_function_map] extension = {:?}", &ext); if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { - code_files.push(path); + match path.canonicalize() { + Ok(abs_path) => code_files.push(abs_path), + Err(e) => log::error!("Failed to get absolute path for {:?}: {:?}", path, e), + } } } if code_files.is_empty() { @@ -156,7 +150,7 @@ pub fn numbered_content(file_contents: String) -> Vec { let lines = file_contents .lines() .enumerate() - .map(|(index, line)| format!("{} {}", index+1, line)) + .map(|(index, line)| format!("{} {}", index, line)) .collect::>(); return lines; } From ac70d6821858f633ca0965f5bd90317f5cdedf9b Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Wed, 18 Sep 2024 04:18:43 +0530 Subject: [PATCH 30/43] fix flowchart text and relative path --- vibi-dpu/src/core/diff_graph.rs | 2 +- vibi-dpu/src/graph/elements.rs | 24 +- vibi-dpu/src/graph/function_line_range.rs | 27 +- vibi-dpu/src/graph/graph_edges.rs | 294 ++++++++++++---------- vibi-dpu/src/graph/mermaid_elements.rs | 29 ++- vibi-dpu/src/graph/utils.rs | 37 ++- 6 files changed, 246 insertions(+), 167 deletions(-) diff --git a/vibi-dpu/src/core/diff_graph.rs b/vibi-dpu/src/core/diff_graph.rs index e4e66c52..0aafc508 100644 --- a/vibi-dpu/src/core/diff_graph.rs +++ b/vibi-dpu/src/core/diff_graph.rs @@ -26,7 +26,7 @@ async fn diff_graph_comment_text(excluded_files: &Vec, small_files: &V if let Some(mermaid_text) = mermaid_comment(&all_diff_files, review).await { comment += mermaid_text.as_str(); } - comment += "To modify DiffGraph settings, go to [your Vibinex settings page.](https://vibinex.com/settings)\n"; + comment += "\nTo modify DiffGraph settings, go to [your Vibinex settings page.](https://vibinex.com/settings)\n"; return comment; } diff --git a/vibi-dpu/src/graph/elements.rs b/vibi-dpu/src/graph/elements.rs index a17d83e3..4d35d9dc 100644 --- a/vibi-dpu/src/graph/elements.rs +++ b/vibi-dpu/src/graph/elements.rs @@ -76,7 +76,7 @@ impl MermaidSubgraph { all_nodes.push(node.render_node(review, subgraph_map)); } let subgraph_str = format!( - "\tsubgraph {} [{}]\n{}\n\tend\n{}\n", + "\tsubgraph {} [\"{}\"]\n{}\n\tend\n{}\n", self.mermaid_id, self.name, all_nodes.join("\n"), @@ -179,7 +179,7 @@ impl MermaidNode { let url_str = format!("\tclick {} href \"{}\" _blank", self.mermaid_id(), self.get_node_str(review, subgraph_map)); let class_str = self.get_style_class(); - let node_str = format!("\t{}[{}]", &self.mermaid_id, &self.function_name); + let node_str = format!("\t{}[\"{}\"]", &self.mermaid_id, &self.function_name); return format!("{}\n{}\n{}", &node_str, &class_str, &url_str); } @@ -347,9 +347,9 @@ impl MermaidGraphElements { dest_color: &str, source_def_line: &usize, dest_def_line: &usize - ) { - self.create_node(source_file, source_func_name, source_color, source_def_line); - self.create_node(dest_file, dest_func_name, dest_color, dest_def_line); + ) { + self.create_or_modify_node(source_file, source_func_name, source_color, source_def_line); + self.create_or_modify_node(dest_file, dest_func_name, dest_color, dest_def_line); let edge = MermaidEdge::new( calling_line_num, source_func_name.to_string(), @@ -357,10 +357,11 @@ impl MermaidGraphElements { dest_func_name.to_string(), dest_file.to_string(), edge_color.to_string()); + log::debug!("[add_edge] edge = {:#?}", &edge); self.add_edge_to_edges(edge); } - fn create_node(&mut self, subgraph_key: &str, node_func_name: &str, node_color: &str, def_line: &usize) { + fn create_or_modify_node(&mut self, subgraph_key: &str, node_func_name: &str, node_color: &str, def_line: &usize) { if let Some(subgraph) = self.subgraphs.get_mut(subgraph_key) { if let Some(node) = subgraph.get_mut_node(node_func_name) { node.compare_and_change_color(node_color); @@ -395,17 +396,6 @@ impl MermaidGraphElements { self.edges.insert(edge_key, edge); } - // fn render_edges(&self) -> String { - // let mut all_edges = Vec::::new(); - // let mut all_edges_style = Vec::::new(); - // for (idx, (_, edge)) in self.edges.iter().enumerate() { - // all_edges.push(edge.render_edge_definition(&self.subgraphs)); - // all_edges_style.push(format!("\tlinkStyle {} {}", idx, edge.render_edge_style())); - // } - // let all_edges_str = format!("{}{}", all_edges.join("\n"), all_edges_style.join("\n")); - // all_edges_str - // } - fn render_subgraphs(&self, review: &Review) -> String { format!("{}\n{}", self.subgraphs diff --git a/vibi-dpu/src/graph/function_line_range.rs b/vibi-dpu/src/graph/function_line_range.rs index b96049ea..d5e2baf9 100644 --- a/vibi-dpu/src/graph/function_line_range.rs +++ b/vibi-dpu/src/graph/function_line_range.rs @@ -9,10 +9,10 @@ use super::{gitops::HunkDiffLines, utils::{all_code_files, call_llm_api, read_fi #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FuncDefInfo { - name: String, - line_start: usize, - line_end: usize, - parent: String, + pub(crate) name: String, + pub(crate) line_start: usize, + pub(crate) line_end: usize, + pub(crate) parent: String, } impl PartialEq for FuncDefInfo { @@ -33,12 +33,16 @@ impl FuncDefInfo { pub fn line_end(&self) -> &usize { &self.line_end } + + pub fn parent(&self) -> &String { + &self.parent + } } #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FunctionFileMap { - file_name: String, - functions: Vec + pub(crate) file_name: String, + pub(crate) functions: Vec // implement a function which takes in starting and ending line numbers of a continous range // and returns the functions inside the range like Vec of ((start_line, end_line) function_name) } @@ -90,7 +94,7 @@ impl FunctionFileMap { #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct AllFileFunctions { - func_map: HashMap // file name will be key + pub(crate) func_map: HashMap // file name will be key } impl AllFileFunctions { @@ -168,7 +172,13 @@ pub async fn generate_function_map(file_paths: &Vec) -> Option) -> Option, head_filepaths: &Vec, review: &Review, all_import_info: &FilesImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { + outgoing_edges(base_filepaths, head_filepaths, diff_graph, graph_elems, review).await; + incoming_edges(head_filepaths, review, all_import_info, diff_graph, graph_elems).await; } -async fn incoming_edges(review: &Review, all_import_info: &FilesImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { +async fn incoming_edges(head_filepaths: &Vec, review: &Review, all_import_info: &FilesImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { for (dest_filename, func_defs) in diff_graph.diff_func_defs() { for dest_func in func_defs.added_func_defs() { git_checkout_commit(review, review.pr_head_commit()); @@ -17,29 +17,32 @@ async fn incoming_edges(review: &Review, all_import_info: &FilesImportInfo, diff let file_imports = file_func_defs.all_import_paths(); for file_import in file_imports { // search for correct import - if match_import_condition(dest_filename, &file_import, dest_func) { - // find func call - let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); - if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { - // call func in that takes vec of lines and returns funcdefs - let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); - for (line_num, source_func_def) in source_func_defs { - if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("", - line_num.to_owned(), - &source_func_def.name(), - &dest_func.name(), - &source_filename, - dest_filename, - "", - "green", - source_func_def.line_start(), - dest_func.line_start() - ); + if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, &file_import.import_path()) { + if match_import_func(&file_import, dest_func) { + // find func call + let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + // TODO, FIXME - function_calls_in_file should have src_filename or src_filepath? - check other calls to the function as well + if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // call func in that takes vec of lines and returns funcdefs + let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + for (line_num, source_func_def) in source_func_defs { + if source_func_def != dest_func.to_owned() { + graph_elems.add_edge("", + line_num.to_owned(), + &source_func_def.name(), + &dest_func.name(), + &source_filename, + dest_filename, + "", + "green", + source_func_def.line_start(), + dest_func.line_start() + ); + } } } - } + } } } } @@ -49,29 +52,31 @@ async fn incoming_edges(review: &Review, all_import_info: &FilesImportInfo, diff let file_imports = file_func_defs.all_import_paths(); for file_import in file_imports { // search for correct import - if match_import_condition(dest_filename, &file_import, dest_func) { - // if found, create edge - let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); - if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { - // call func in that takes vec of lines and returns funcdefs - let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); - for (line_num, source_func_def) in source_func_defs { - if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("", - line_num.to_owned(), - &source_func_def.name(), - &dest_func.name(), - &source_filename, - dest_filename, - "", - "green", - source_func_def.line_start(), - dest_func.line_start() - ); + if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { + if match_import_func(&file_import, dest_func) { + // if found, create edge + let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // call func in that takes vec of lines and returns funcdefs + let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + for (line_num, source_func_def) in source_func_defs { + if source_func_def != dest_func.to_owned() { + graph_elems.add_edge("", + line_num.to_owned(), + &source_func_def.name(), + &dest_func.name(), + &source_filename, + dest_filename, + "", + "green", + source_func_def.line_start(), + dest_func.line_start() + ); + } } } - } + } } } } @@ -82,30 +87,32 @@ async fn incoming_edges(review: &Review, all_import_info: &FilesImportInfo, diff let file_imports = file_func_defs.all_import_paths(); for file_import in file_imports { // search for correct import - if match_import_condition(dest_filename, &file_import, dest_func) { - // find func call - git_checkout_commit(review, review.pr_head_commit()); - let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); - if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { - // call func in that takes vec of lines and returns funcdefs - let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); - for (line_num, source_func_def) in source_func_defs { - if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("", - line_num.to_owned(), - &source_func_def.name(), - &dest_func.name(), - &source_filename, - dest_filename, - "", - "red", - source_func_def.line_start(), - dest_func.line_start() - ); + if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { + if match_import_func(&file_import, dest_func) { + // find func call + git_checkout_commit(review, review.pr_head_commit()); + let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // call func in that takes vec of lines and returns funcdefs + let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + for (line_num, source_func_def) in source_func_defs { + if source_func_def != dest_func.to_owned() { + graph_elems.add_edge("", + line_num.to_owned(), + &source_func_def.name(), + &dest_func.name(), + &source_filename, + dest_filename, + "", + "red", + source_func_def.line_start(), + dest_func.line_start() + ); + } } } - } + } } } } @@ -114,49 +121,58 @@ async fn incoming_edges(review: &Review, all_import_info: &FilesImportInfo, diff let file_imports = file_func_defs.all_import_paths(); for file_import in file_imports { // search for correct import - if match_import_condition(dest_filename, &file_import, dest_func) { - // if found, create edge - let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); - if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { - // call func in that takes vec of lines and returns funcdefs - let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); - for (line_num, source_func_def) in source_func_defs { - if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("red", - line_num.to_owned(), - &source_func_def.name(), - &dest_func.name(), - &source_filename, - dest_filename, - "", - "red", - source_func_def.line_start(), - dest_func.line_start() - ); + if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { + if match_import_func(&file_import, dest_func) { + // if found, create edge + let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // call func in that takes vec of lines and returns funcdefs + let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + for (line_num, source_func_def) in source_func_defs { + if source_func_def != dest_func.to_owned() { + graph_elems.add_edge("red", + line_num.to_owned(), + &source_func_def.name(), + &dest_func.name(), + &source_filename, + dest_filename, + "", + "red", + source_func_def.line_start(), + dest_func.line_start() + ); + } } } - } - } + } + } } } } } } -fn match_import_condition(dest_filename: &str, import_obj: &ImportPath, dest_func_info: &FuncDefInfo) -> bool { - match_overlap( - &dest_filename, - &import_obj.import_path(), +fn match_import_func(import_obj: &ImportPath, dest_func_info: &FuncDefInfo) -> bool { + log::debug!("[match_import_condition] import_obj.imported = {}, dest_func_info = {:#?}", import_obj.imported(), dest_func_info); + // TODO FIXME - first condition doesn't make sense, it should always be true? - have to check for all calls of this function + match_overlap(&dest_func_info.name(), + &import_obj.imported(), 0.5) - && match_overlap(&dest_func_info.name(), + || match_overlap(&dest_func_info.parent(), &import_obj.imported(), 0.5) } -async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, review: &Review) { - for (source_filename, func_calls) in diff_graph.diff_func_calls() { +async fn outgoing_edges(base_filepaths: &Vec, head_filepaths: &Vec, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, review: &Review) { + git_checkout_commit(review, review.base_head_commit()); + for (source_filepath, func_calls) in diff_graph.diff_func_calls() { + let mut source_file_name = source_filepath.to_owned(); + if let Some(source_file) = absolute_to_relative_path(source_filepath, review){ + source_file_name = source_file.clone(); + } for source_func_call in func_calls.added_calls() { + log::debug!("[outgoing_edges] source func call import info = {:#?}", source_func_call.import_info()); let dest_filename = source_func_call.import_info().import_path(); let lines = source_func_call.call_info().iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); // send this file for getting func defs @@ -166,14 +182,15 @@ async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphEl if let Some(func_defs) = diff_file_funcdefs.functions_in_file(dest_filename) { let source_func_defs = func_defs.funcs_for_lines(&lines); for dest_func_def in func_defs.functions() { - if match_import_condition(dest_filename, source_func_call.import_info(), dest_func_def) { + if match_import_func( source_func_call.import_info(), dest_func_def) { // add edge + log::debug!("[outgoing_edges] Adding edge"); for (line_num, source_func_def) in &source_func_defs { graph_elems.add_edge("green", line_num.to_owned(), source_func_def.name(), dest_func_def.name(), - source_filename, + &source_file_name, dest_filename, "green", "", @@ -185,21 +202,26 @@ async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphEl } } // search in full graph - let mut dest_filepath = PathBuf::from_str(review.clone_dir()).expect("Unable to get path"); - dest_filepath.push(dest_filename); - if let Some(all_file_funcdefs) = generate_function_map(&vec![dest_filepath]).await { + let dest_filepath_opt = match_imported_filename_to_path(base_filepaths, dest_filename); + if dest_filepath_opt.is_none() { + log::error!("[outgoing_edges] Unable to find filename in all paths: {}", dest_filename); + continue; + } + let dest_filepath = dest_filepath_opt.expect("EMpty dest_filepath_opt"); + if let Some(all_file_funcdefs) = generate_function_map(&vec![dest_filepath.clone()]).await { // identify this particular func - if let Some(func_defs) = all_file_funcdefs.functions_in_file(dest_filename) { + let dest_filepath_key = dest_filepath.as_os_str().to_str().expect("Unable to deserialize dest_filepath"); + if let Some(func_defs) = all_file_funcdefs.functions_in_file(dest_filepath_key) { let source_func_defs = func_defs.funcs_for_lines(&lines); for dest_func_def in func_defs.functions() { - if match_import_condition(dest_filename, source_func_call.import_info(), dest_func_def) { + if match_import_func(source_func_call.import_info(), dest_func_def) { // add edge for (line_num, source_func_def) in &source_func_defs { graph_elems.add_edge("green", line_num.to_owned(), source_func_def.name(), dest_func_def.name(), - source_filename, + &source_file_name, dest_filename, "green", "", @@ -214,6 +236,7 @@ async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphEl } // do same for deleted_calls for source_func_call in func_calls.deleted_calls() { + log::debug!("[outgoing_edges] source func call import info = {:#?}", source_func_call.import_info()); let dest_filename = source_func_call.import_info().import_path(); let diff_file_funcdefs = diff_graph.all_file_func_defs(); let lines = source_func_call.call_info().iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); @@ -221,14 +244,14 @@ async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphEl if let Some(func_defs) = diff_file_funcdefs.functions_in_file(dest_filename) { let source_func_defs = func_defs.funcs_for_lines(&lines); for dest_func_def in func_defs.functions() { - if match_import_condition(dest_filename, source_func_call.import_info(), dest_func_def) { + if match_import_func(source_func_call.import_info(), dest_func_def) { // add edge for (line_num, source_func_def) in &source_func_defs { graph_elems.add_edge("red", line_num.to_owned(), source_func_def.name(), dest_func_def.name(), - source_filename, + &source_file_name, dest_filename, "red", "", @@ -240,26 +263,41 @@ async fn outgoing_edges(diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphEl } } // send this file for getting func defs - let dest_filepath = PathBuf::from_str(dest_filename).expect("Unable to get path"); - if let Some(all_file_funcdefs) = generate_function_map(&vec![dest_filepath]).await { + let dest_filepath_opt = match_imported_filename_to_path(base_filepaths, dest_filename); + if dest_filepath_opt.is_none() { + log::error!("[outgoing_edges] Unable to find filename in all paths: {}", dest_filename); + continue; + } + let dest_filepath = dest_filepath_opt.expect("EMpty dest_filepath_opt"); + if let Some(all_file_funcdefs) = generate_function_map(&vec![dest_filepath.clone()]).await { // identify this particular func - if let Some(func_defs) = all_file_funcdefs.functions_in_file(dest_filename) { - let source_func_defs = func_defs.funcs_for_lines(&lines); - for dest_func_def in func_defs.functions() { - if match_import_condition(dest_filename, source_func_call.import_info(), dest_func_def) { - // add edge - for (line_num, source_func_def) in &source_func_defs { - graph_elems.add_edge("red", - line_num.to_owned(), - source_func_def.name(), - dest_func_def.name(), - source_filename, - dest_filename, - "red", - "", - source_func_def.line_start(), - dest_func_def.line_start() - ); + if let Some(src_file_funcs) = diff_graph.all_file_func_defs().functions_in_file(source_filepath) { + let dest_filepath_key = dest_filepath.as_os_str().to_str().expect("Unable to deserialize dest_filepath"); + if let Some(dest_func_defs) = all_file_funcdefs.functions_in_file(dest_filepath_key) { + let mut rel_dest_filepath = dest_filepath_key.to_string(); + if let Some(dest_file) = absolute_to_relative_path(dest_filepath_key, review){ + rel_dest_filepath = dest_file.clone(); + } + // TODO FIXME - func_defs is for dest, we need it for src file, check other places as well to fix this + let source_func_defs = src_file_funcs.funcs_for_lines(&lines); + log::debug!("[outgoing_edges] lines = {:?}, source_func_defs = {:#?} dest_func_defs = {:#?}", &lines, &source_func_defs, &dest_func_defs); + for dest_func_def in dest_func_defs.functions() { + if match_import_func(source_func_call.import_info(), dest_func_def) { + // add edge + for (line_num, source_func_def) in &source_func_defs { + log::debug!("[outgoing_edges] Adding edge for deleted func in full_graph"); + graph_elems.add_edge("red", + line_num.to_owned(), + source_func_def.name(), + dest_func_def.name(), + &source_file_name, + &rel_dest_filepath, + "red", + "", + source_func_def.line_start(), + dest_func_def.line_start() + ); + } } } } diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs index 0d418e43..f30bd761 100644 --- a/vibi-dpu/src/graph/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -1,7 +1,7 @@ use crate::{graph::{elements::MermaidGraphElements, graph_edges::graph_edges, graph_info::generate_diff_graph}, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; -use super::{file_imports::get_import_lines, utils::all_code_files}; +use super::{file_imports::{get_import_lines, ChunkImportInfo, FileImportInfo, FileImportLines, FilesImportInfo, ImportPath}, function_call::FunctionCallChunk, function_line_range::{AllFileFunctions, FuncDefInfo, FunctionFileMap}, graph_info::{DiffFuncCall, DiffFuncDefs, DiffGraph, FuncCall}, utils::all_code_files}; pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Review) -> Option { @@ -21,7 +21,8 @@ pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Rev 'nodeSpacing': 100, \ 'rankSpacing': 100 \ }} \ - }} }}%%\n{}", + }} }}%%\n \ + \tflowchart LR\n{}", &flowchart_content ); return Some(flowchart_str); @@ -30,21 +31,21 @@ pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Rev async fn generate_flowchart_elements(diff_files: &Vec, review: &Review) -> Option { // generate full graph for base commit id git_checkout_commit(review, review.base_head_commit()); - let repo_code_files_opt = all_code_files(review.clone_dir()); - if repo_code_files_opt.is_none() { + let base_filepaths_opt = all_code_files(review.clone_dir()); + if base_filepaths_opt.is_none() { log::error!( "[generate_flowchart_elements] Unable to get file paths: {}", review.clone_dir()); return None; } - let repo_code_files = repo_code_files_opt.expect("Empty repo_code_files_opt"); - let base_commit_import_info_opt = get_import_lines(&repo_code_files).await; + let base_filepaths = base_filepaths_opt.expect("Empty base_filepaths_opt"); + let base_commit_import_info_opt = get_import_lines(&base_filepaths).await; log::debug!("[generate_flowchart_elements] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all_file_import_info_opt = {:#?}", &base_commit_import_info_opt); if base_commit_import_info_opt.is_none() { - log::error!("[generate_flowchart_elements] Unable to get import info for source files: {:#?}", &repo_code_files); + log::error!("[generate_flowchart_elements] Unable to get import info for source files: {:#?}", &base_filepaths); return None; } let base_commit_import_info = base_commit_import_info_opt.expect("Empty import_lines_opt"); - git_checkout_commit(review, review.pr_head_commit()); + // let base_commit_import_info = get_test_import_info(); let diff_graph_opt = generate_diff_graph(diff_files, review, &base_commit_import_info).await; log::debug!("[generate_flowchart_elements] diff_graph_opt = {:#?}", &diff_graph_opt); if diff_graph_opt.is_none() { @@ -54,10 +55,18 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review return None; } let diff_graph = diff_graph_opt.expect("Empty diff_graph_opt"); + // let diff_graph = get_test_diff_graph(); // let diff_info = generate_diff_info(&full_graph, &diff_graph); let mut graph_elems = MermaidGraphElements::new(); - git_checkout_commit(review, review.base_head_commit()); - graph_edges(review, &base_commit_import_info, &diff_graph, &mut graph_elems).await; + git_checkout_commit(review, review.pr_head_commit()); + let head_filepaths_opt = all_code_files(review.clone_dir()); + if head_filepaths_opt.is_none() { + log::error!( + "[generate_flowchart_elements] Unable to get file paths: {}", review.clone_dir()); + return None; + } + let head_filepaths = head_filepaths_opt.expect("Empty head_filepaths_opt"); + graph_edges(&base_filepaths, &head_filepaths, review, &base_commit_import_info, &diff_graph, &mut graph_elems).await; let elems_str = graph_elems.render_elements(review); return Some(elems_str); } \ No newline at end of file diff --git a/vibi-dpu/src/graph/utils.rs b/vibi-dpu/src/graph/utils.rs index aec31d51..634eddc0 100644 --- a/vibi-dpu/src/graph/utils.rs +++ b/vibi-dpu/src/graph/utils.rs @@ -70,7 +70,7 @@ pub async fn call_llm_api(prompt: String) -> Option { } let parsed_chunk = parsed_chunk_res.expect("Uncaught error in parsed_chunk_res"); if let Some(parsed_response) = parsed_chunk.get("response").and_then(|v| v.as_str()){ - final_response.push_str(parsed_response); + final_response.push_str(parsed_response); } if let Some(done_field) = parsed_chunk.get("done").and_then(|v| v.as_bool()) { if done_field { @@ -78,6 +78,10 @@ pub async fn call_llm_api(prompt: String) -> Option { } } } + let final_response_trimmed = final_response.trim(); + if final_response_trimmed.starts_with("{") && !final_response_trimmed.ends_with("}") { + final_response.push_str("}"); + } log::debug!("[call_llm_api] final_response = {:?}", &final_response); Some(final_response) } @@ -115,9 +119,9 @@ pub fn all_code_files(dir: &str) -> Option> { let mut code_files = Vec::::new(); for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) { let path = entry.path().to_owned(); - log::debug!("[generate_function_map] path = {:?}", path); + log::debug!("[all_code_files] path = {:?}", path); let ext = path.extension().and_then(|ext| ext.to_str()); - log::debug!("[generate_function_map] extension = {:?}", &ext); + log::debug!("[all_code_files] extension = {:?}", &ext); if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { match path.canonicalize() { Ok(abs_path) => code_files.push(abs_path), @@ -131,6 +135,18 @@ pub fn all_code_files(dir: &str) -> Option> { return Some(code_files); } +pub fn match_imported_filename_to_path(paths: &Vec, filename: &str) -> Option { + let relative_path = Path::new(filename); + // Find the first path that matches the filename or relative path + for path in paths { + if path.ends_with(relative_path) { + return Some(path.clone()); // Return the first matching path + } + } + // Return an empty PathBuf or handle the case where no match is found + None +} + pub fn source_diff_files(diff_files: &Vec) -> Option> { let mut code_files = Vec::::new(); for stat_item in diff_files { @@ -157,8 +173,23 @@ pub fn numbered_content(file_contents: String) -> Vec { pub fn match_overlap(str1: &str, str2: &str, similarity_threshold: f64) -> bool { let similarity = jaro_winkler(str1, str2); + log::debug!("[match_overlap] str1 = {}, str2 = {}, similarity = {}, similarity_threshold = {}", str1, str2, similarity, similarity_threshold); if similarity >= similarity_threshold { return true; } return false; +} + +pub fn absolute_to_relative_path(abs_path: &str, review: &Review) -> Option { + let base_path = review.clone_dir(); + let full_path = PathBuf::from(abs_path); + let rel_path_res = full_path.strip_prefix(base_path); + log::debug!("[absolute_to_relative_path] rel_path = {:#?}", &rel_path_res); + log::debug!("[absolute_to_relative_path] full_path = {:?}, base_path = {:?}", &full_path, base_path); + if let Err(e) = rel_path_res { + log::error!("[absolute_to_relative_path] Error in removing prefix: {:?}", e); + return None; + } + let rel_path = rel_path_res.expect("Uncaught error in rel_path_res"); + return Some(rel_path.to_str().expect("Unable to deserialze rel_path").to_string()); } \ No newline at end of file From 06d34e46a721ee70419762aed8098651d115d490 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Mon, 23 Sep 2024 17:43:53 +0530 Subject: [PATCH 31/43] stored diff func calls in diff graph struct --- vibi-dpu/src/graph/elements.rs | 12 +++- vibi-dpu/src/graph/file_imports.rs | 6 +- vibi-dpu/src/graph/function_call.rs | 8 ++- vibi-dpu/src/graph/graph_edges.rs | 69 +++++++++++++++++----- vibi-dpu/src/graph/graph_info.rs | 88 +++++++++++++++++++++++------ 5 files changed, 147 insertions(+), 36 deletions(-) diff --git a/vibi-dpu/src/graph/elements.rs b/vibi-dpu/src/graph/elements.rs index 4d35d9dc..9327904b 100644 --- a/vibi-dpu/src/graph/elements.rs +++ b/vibi-dpu/src/graph/elements.rs @@ -75,10 +75,14 @@ impl MermaidSubgraph { for (_, node) in self.nodes() { all_nodes.push(node.render_node(review, subgraph_map)); } + let mut subg_name: &str = self.name(); + if subg_name == "" { + subg_name = "unidentified_file"; + } let subgraph_str = format!( "\tsubgraph {} [\"{}\"]\n{}\n\tend\n{}\n", self.mermaid_id, - self.name, + subg_name, all_nodes.join("\n"), self.render_subgraph_style() ); @@ -179,7 +183,11 @@ impl MermaidNode { let url_str = format!("\tclick {} href \"{}\" _blank", self.mermaid_id(), self.get_node_str(review, subgraph_map)); let class_str = self.get_style_class(); - let node_str = format!("\t{}[\"{}\"]", &self.mermaid_id, &self.function_name); + let mut func_name: &str = self.function_name(); + if func_name == "" { + func_name = "unidentified_func"; + } + let node_str = format!("\t{}[\"{}\"]", &self.mermaid_id, func_name); return format!("{}\n{}\n{}", &node_str, &class_str, &url_str); } diff --git a/vibi-dpu/src/graph/file_imports.rs b/vibi-dpu/src/graph/file_imports.rs index eb5d1c0a..2edda1f4 100644 --- a/vibi-dpu/src/graph/file_imports.rs +++ b/vibi-dpu/src/graph/file_imports.rs @@ -36,7 +36,7 @@ struct LlmImportPathRequest { } #[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct ImportPath { +pub struct ImportPath { import_line: usize, import_path: String, imported: String @@ -49,6 +49,10 @@ impl PartialEq for ImportPath { } impl ImportPath { + + pub fn new(import_line: usize, import_path: String, imported: String) -> Self { + Self { import_line, import_path, imported } + } pub fn import_path(&self) -> &String { &self.import_path } diff --git a/vibi-dpu/src/graph/function_call.rs b/vibi-dpu/src/graph/function_call.rs index bf525c8f..dd433d16 100644 --- a/vibi-dpu/src/graph/function_call.rs +++ b/vibi-dpu/src/graph/function_call.rs @@ -6,12 +6,14 @@ use super::{file_imports::get_import_lines, gitops::HunkDiffMap, utils::{call_ll #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FunctionCallChunk { - function_calls: Vec + function_calls: Vec, + #[serde(skip_deserializing)] + function_name: Option } impl FunctionCallChunk { - pub fn new(function_calls: Vec) -> Self { - Self { function_calls } + pub fn new(function_calls: Vec, function_name: String) -> Self { + Self { function_calls, function_name: Some(function_name) } } pub fn function_calls(&self) -> &Vec { &self.function_calls diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs index f44cb345..239e5d87 100644 --- a/vibi-dpu/src/graph/graph_edges.rs +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -10,10 +10,18 @@ pub async fn graph_edges(base_filepaths: &Vec, head_filepaths: &Vec, review: &Review, all_import_info: &FilesImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { for (dest_filename, func_defs) in diff_graph.diff_func_defs() { + let mut dest_file_rel = dest_filename.to_string(); + if let Some(dest_file_relative_path) = absolute_to_relative_path(&dest_filename, review) { + dest_file_rel = dest_file_relative_path; + } for dest_func in func_defs.added_func_defs() { git_checkout_commit(review, review.pr_head_commit()); // search in diff graph for (source_filename, file_func_defs) in diff_graph.all_file_imports().file_import_map() { + let mut source_rel_path = source_filename.to_string(); + if let Some(src_relative_filepath) = absolute_to_relative_path(&source_rel_path, review) { + source_rel_path = src_relative_filepath; + } let file_imports = file_func_defs.all_import_paths(); for file_import in file_imports { // search for correct import @@ -32,8 +40,8 @@ async fn incoming_edges(head_filepaths: &Vec, review: &Review, all_impo line_num.to_owned(), &source_func_def.name(), &dest_func.name(), - &source_filename, - dest_filename, + &source_rel_path, + &dest_file_rel, "", "green", source_func_def.line_start(), @@ -49,6 +57,10 @@ async fn incoming_edges(head_filepaths: &Vec, review: &Review, all_impo git_checkout_commit(review, review.base_head_commit()); // search in full graph for (source_filename, file_func_defs) in all_import_info.file_import_map() { + let mut source_file_rel = source_filename.to_string(); + if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { + source_file_rel = src_relative_filepath; + } let file_imports = file_func_defs.all_import_paths(); for file_import in file_imports { // search for correct import @@ -59,15 +71,20 @@ async fn incoming_edges(head_filepaths: &Vec, review: &Review, all_impo if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { // call func in that takes vec of lines and returns funcdefs let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); + if source_func_defs_opt.is_none() { + log::debug!("[incoming_edges] No funcs for file: {}", source_filename); + continue; + } + let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); for (line_num, source_func_def) in source_func_defs { if source_func_def != dest_func.to_owned() { graph_elems.add_edge("", line_num.to_owned(), &source_func_def.name(), &dest_func.name(), - &source_filename, - dest_filename, + &source_file_rel, + &dest_file_rel, "", "green", source_func_def.line_start(), @@ -84,6 +101,10 @@ async fn incoming_edges(head_filepaths: &Vec, review: &Review, all_impo for dest_func in func_defs.deleted_func_defs() { // search in diff graph for (source_filename, file_func_defs) in diff_graph.all_file_imports().file_import_map() { + let mut source_file_rel = source_filename.to_string(); + if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { + source_file_rel = src_relative_filepath; + } let file_imports = file_func_defs.all_import_paths(); for file_import in file_imports { // search for correct import @@ -95,15 +116,20 @@ async fn incoming_edges(head_filepaths: &Vec, review: &Review, all_impo if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { // call func in that takes vec of lines and returns funcdefs let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); + if source_func_defs_opt.is_none() { + log::debug!("[incoming_edges] No funcs for file: {}", source_filename); + continue; + } + let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); for (line_num, source_func_def) in source_func_defs { if source_func_def != dest_func.to_owned() { graph_elems.add_edge("", line_num.to_owned(), &source_func_def.name(), &dest_func.name(), - &source_filename, - dest_filename, + &source_file_rel, + &dest_file_rel, "", "red", source_func_def.line_start(), @@ -118,6 +144,10 @@ async fn incoming_edges(head_filepaths: &Vec, review: &Review, all_impo } // search in full graph for (source_filename, file_func_defs) in all_import_info.file_import_map() { + let mut source_file_rel = source_filename.to_string(); + if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { + source_file_rel = src_relative_filepath; + } let file_imports = file_func_defs.all_import_paths(); for file_import in file_imports { // search for correct import @@ -128,15 +158,20 @@ async fn incoming_edges(head_filepaths: &Vec, review: &Review, all_impo if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { // call func in that takes vec of lines and returns funcdefs let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); + if source_func_defs_opt.is_none() { + log::debug!("[incoming_edges] No funcs for file: {}", source_filename); + continue; + } + let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); for (line_num, source_func_def) in source_func_defs { if source_func_def != dest_func.to_owned() { graph_elems.add_edge("red", line_num.to_owned(), &source_func_def.name(), &dest_func.name(), - &source_filename, - dest_filename, + &source_file_rel, + &dest_file_rel, "", "red", source_func_def.line_start(), @@ -158,10 +193,10 @@ fn match_import_func(import_obj: &ImportPath, dest_func_info: &FuncDefInfo) -> b // TODO FIXME - first condition doesn't make sense, it should always be true? - have to check for all calls of this function match_overlap(&dest_func_info.name(), &import_obj.imported(), - 0.5) + 0.6) || match_overlap(&dest_func_info.parent(), &import_obj.imported(), - 0.5) + 0.6) } async fn outgoing_edges(base_filepaths: &Vec, head_filepaths: &Vec, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, review: &Review) { @@ -173,6 +208,7 @@ async fn outgoing_edges(base_filepaths: &Vec, head_filepaths: &Vec, head_filepaths: &Vec, head_filepaths: &Vec, head_filepaths: &Vec &String { + self.import_info.imported() + } } #[derive(Debug, Default, Clone)] @@ -84,8 +91,9 @@ impl DiffFuncCall { pub struct DiffGraph { diff_files_func_defs: AllFileFunctions, diff_files_imports: FilesImportInfo, + diff_files_func_calls: HashMap>, diff_func_defs: HashMap, - diff_func_calls: HashMap + diff_func_calls: HashMap, } impl DiffGraph { @@ -112,6 +120,15 @@ impl DiffGraph { pub fn diff_func_calls(&self) -> &HashMap { &self.diff_func_calls } + + pub fn func_calls_for_func(&self, function_name: &str, filename: &str) -> Option<&FuncCall> { + if let Some(func_call_map) = self.diff_files_func_calls.get(filename) { + if let Some(func_call) = func_call_map.get(function_name) { + return Some(func_call) + } + } + return None; + } } pub async fn generate_diff_graph(diff_files: &Vec, review: &Review, base_commit_import_info: &FilesImportInfo) -> Option { @@ -151,7 +168,9 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, base_commit_import_info: } let diff_files_func_defs = diff_func_defs_opt.expect("Empty all_file_func_defs_opt)"); let diff_files_imports = diff_imports_opt.expect("Empty all_file_imports_opt"); + let diff_files_func_calls = diff_file_func_calls(&all_diff_files, &diff_files_imports, &diff_files_func_defs).await; let mut diff_graph = DiffGraph { + diff_files_func_calls, diff_files_func_defs, diff_files_imports, diff_func_defs: HashMap::new(), @@ -191,18 +210,22 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, base_commit_import_info: // might need to reorder for loops to make sure repeated calcs are avoided if let Some(imports_info) = diff_graph.all_file_imports().file_import_info(filename) { for import_info in imports_info.all_import_paths() { - // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls - if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { - // func_calls is basically all func calls of a function in the latest commit of the file - if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { - let func_call = FuncCall{ import_info, call_info: func_calls }; - for hunk_diff in file_line_map.added_hunks() { - if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { - diff_func_calls_add.add_added_calls(hunk_func_call); - } - } - } + if let Some(func_call) = diff_graph.func_calls_for_func(import_info.imported(), filename) { + diff_func_calls_add.add_added_calls(func_call.to_owned()); } + // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls + // TODO FIXME - need function call calc for all diff files, need to search for funcdefs as well as imports + // if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { + // // func_calls is basically all func calls of a function in the latest commit of the file + // if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + // let func_call = FuncCall{ import_info, call_info: func_calls }; + // for hunk_diff in file_line_map.added_hunks() { + // if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { + // diff_func_calls_add.add_added_calls(hunk_func_call); + // } + // } + // } + // } } } // Use full graph's import info @@ -237,4 +260,37 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, base_commit_import_info: diff_graph.add_diff_func_calls(filename.to_owned(), diff_func_call.to_owned()); } return Some(diff_graph); -} \ No newline at end of file +} + +async fn diff_file_func_calls(all_diff_files: &Vec, diff_imports: &FilesImportInfo, diff_file_funcs: &AllFileFunctions) -> HashMap>{ + let mut func_call_file_map = HashMap::new(); + for filepathbuf in all_diff_files { + let filepath = filepathbuf.to_str().expect("Unable to deserialize pathbuf"); + let mut func_call_map = HashMap::::new(); + // search using imports + if let Some(imports_info) = diff_imports.file_import_info(filepath) { + for import_info in imports_info.all_import_paths() { + if let Some(func_calls) = function_calls_in_file( + &filepathbuf, import_info.imported()).await { + let func_call = FuncCall{ import_info, call_info: func_calls }; + func_call_map.insert( + func_call.function_name().to_string(), func_call); + } + } + } + // search in func defs + if let Some(func_def_map) = diff_file_funcs.functions_in_file(filepath) { + for func_def in func_def_map.functions() { + if let Some(func_calls) = function_calls_in_file( + &filepathbuf, func_def.name()).await { + let fake_import = ImportPath::new( 0, filepath.to_string(), func_def.name().to_string()); + let func_call = FuncCall{import_info: fake_import, call_info: func_calls}; + func_call_map.insert( + func_call.function_name().to_string(), func_call); + } + } + } + func_call_file_map.insert(filepath.to_string(), func_call_map); + } + return func_call_file_map; +} \ No newline at end of file From 40f427e5c9a25f24ff3990d09ebc2e3605dc119e Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Tue, 24 Sep 2024 12:18:04 +0530 Subject: [PATCH 32/43] fix empty name in nodes --- vibi-dpu/src/graph/function_line_range.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vibi-dpu/src/graph/function_line_range.rs b/vibi-dpu/src/graph/function_line_range.rs index d5e2baf9..1ea40039 100644 --- a/vibi-dpu/src/graph/function_line_range.rs +++ b/vibi-dpu/src/graph/function_line_range.rs @@ -190,6 +190,10 @@ pub async fn generate_function_map(file_paths: &Vec) -> Option Date: Thu, 3 Oct 2024 06:12:02 +0530 Subject: [PATCH 33/43] rewrite import and function call import --- vibi-dpu/src/db/graph_info.rs | 45 -- vibi-dpu/src/graph/file_imports.rs | 631 +++++++++++++++---------- vibi-dpu/src/graph/function_call.rs | 149 +++++- vibi-dpu/src/graph/graph_edges.rs | 288 +++++------ vibi-dpu/src/graph/graph_info.rs | 177 +++---- vibi-dpu/src/graph/mermaid_elements.rs | 11 +- 6 files changed, 780 insertions(+), 521 deletions(-) delete mode 100644 vibi-dpu/src/db/graph_info.rs diff --git a/vibi-dpu/src/db/graph_info.rs b/vibi-dpu/src/db/graph_info.rs deleted file mode 100644 index 1a700f7e..00000000 --- a/vibi-dpu/src/db/graph_info.rs +++ /dev/null @@ -1,45 +0,0 @@ -use sled::IVec; - -use crate::{db::config::get_db, graph::file_imports::FilesImportInfo}; -pub fn save_import_info_to_db(review_key: &str, commit_id: &str, all_imports: &FilesImportInfo) { - let db = get_db(); - let graph_info_key = format!("graph_info/{}/{}", review_key, commit_id); - // Serialize repo struct to JSON - let json = serde_json::to_vec(all_imports).expect("Failed to serialize review"); - // Insert JSON into sled DB - let insert_res = db.insert(IVec::from(graph_info_key.as_bytes()), json); - if insert_res.is_err() { - let e = insert_res.expect_err("No error in insert_res"); - log::error!("[save_graph_info_to_db] Failed to upsert graph info into sled DB: {e}"); - return; - } - log::debug!("[save_graph_info_to_db] Graph Info succesfully upserted: {:#?}", all_imports); -} - -pub fn get_import_info_from_db(review_key: &str, commit_id: &str) -> Option { - let db = get_db(); - let graph_info_key = format!("graph_info/{}/{}", review_key, commit_id); - let graph_info_res = db.get(IVec::from(graph_info_key.as_bytes())); - if let Err(e) = graph_info_res { - log::error!("[get_graph_info_from_db] GraphInfo key not found in db - {}, error: {:?}", - &graph_info_key, e); - return None; - } - let ivec_opt = graph_info_res.expect("Uncaught error in graph_info_res"); - log::debug!("[get_graph_info_from_db] ivec_opt: {:?}", ivec_opt); - if ivec_opt.is_none() { - log::error!("[get_graph_info_from_db] No graph info found for {}/{}", review_key, commit_id); - return None; - } - let ivec = ivec_opt.expect("Empty ivec_opt"); - let graph_info_res = serde_json::from_slice(&ivec); - if let Err(e) = graph_info_res { - log::error!( - "[get_graph_info_from_db] Failed to deserialize review from json: {:?}", - e - ); - return None; - } - let graph_info: FilesImportInfo = graph_info_res.expect("Uncaught error in graph_info_res"); - return Some(graph_info); -} \ No newline at end of file diff --git a/vibi-dpu/src/graph/file_imports.rs b/vibi-dpu/src/graph/file_imports.rs index 2edda1f4..273639dc 100644 --- a/vibi-dpu/src/graph/file_imports.rs +++ b/vibi-dpu/src/graph/file_imports.rs @@ -1,295 +1,450 @@ use std::{collections::HashMap, path::PathBuf}; use serde::{Deserialize, Serialize}; +use serde_json::json; use crate::{graph::utils::numbered_content, utils::review::Review}; use super::utils::{all_code_files, call_llm_api, read_file}; -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmImportLineInput { - language: String, - file_path: String, - chunk: String -} +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// struct LlmImportLineInput { +// language: String, +// file_path: String, +// chunk: String +// } -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmImportLineRequest { - input: LlmImportLineInput -} +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// struct LlmImportLineRequest { +// input: LlmImportLineInput +// } -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct FileImportLines { - lines: Vec -} +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct FileImportLines { +// lines: Vec +// } -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmImportPathInput { - language: String, - file_path: String, - import_lines: String -} +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// struct LlmImportPathInput { +// language: String, +// file_path: String, +// import_lines: String +// } -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmImportPathRequest { - input: LlmImportPathInput -} +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// struct LlmImportPathRequest { +// input: LlmImportPathInput +// } -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct ImportPath { - import_line: usize, - import_path: String, - imported: String -} +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct ImportPath { +// import_line: usize, +// import_path: String, +// imported: String +// } -impl PartialEq for ImportPath { - fn eq(&self, other: &Self) -> bool { - self.import_line == other.import_line && self.import_path == other.import_path && self.imported == other.imported - } -} +// impl PartialEq for ImportPath { +// fn eq(&self, other: &Self) -> bool { +// self.import_line == other.import_line && self.import_path == other.import_path && self.imported == other.imported +// } +// } + +// impl ImportPath { -impl ImportPath { +// pub fn new(import_line: usize, import_path: String, imported: String) -> Self { +// Self { import_line, import_path, imported } +// } +// pub fn import_path(&self) -> &String { +// &self.import_path +// } + +// pub fn imported(&self) -> &String { +// &self.imported +// } +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct ImportPaths { +// imports: Vec, +// } + +// impl ImportPaths { +// pub fn imports(&self) -> &Vec { +// &self.imports +// } +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct ChunkImportInfo { +// import_lines: FileImportLines, +// import_paths: Vec +// } + +// impl ChunkImportInfo { +// pub fn import_paths(&self) -> &Vec { +// &self.import_paths +// } +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct FileImportInfo { +// import_chunk_info: Vec, +// filepath: String +// } + +// impl FileImportInfo { +// pub fn all_import_paths(&self) -> Vec { +// let all_paths: Vec = self.import_chunk_info +// .iter() +// .flat_map(|chunk| chunk.import_paths()) +// .cloned() +// .collect(); +// return all_paths; +// } + +// pub fn filepath(&self) -> &String { +// &self.filepath +// } +// } + +// #[derive(Debug, Serialize, Default, Deserialize, Clone)] +// pub struct FilesImportInfo { +// file_import_map: HashMap +// } + +// impl FilesImportInfo { +// pub fn files(&self) -> Vec<&String> { +// self.file_import_map.keys().collect() +// } - pub fn new(import_line: usize, import_path: String, imported: String) -> Self { - Self { import_line, import_path, imported } - } - pub fn import_path(&self) -> &String { - &self.import_path - } +// pub fn is_import_in_file(&self, filename: &str, import_path: &ImportPath) -> bool { +// self.file_import_map[filename].all_import_paths().contains(import_path) +// } - pub fn imported(&self) -> &String { - &self.imported - } -} +// pub fn file_import_info(&self, filename: &str) -> Option<&FileImportInfo> { +// self.file_import_map.get(filename) +// } -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct ImportPaths { - imports: Vec, -} +// pub fn file_import_map(&self) -> &HashMap { +// &self.file_import_map +// } +// } -impl ImportPaths { - pub fn imports(&self) -> &Vec { - &self.imports - } +// pub async fn get_import_lines(file_paths: &Vec) -> Option { +// let mut all_import_info = HashMap::::new(); +// let system_prompt_opt = read_file("/app/prompts/prompt_import_lines"); +// if system_prompt_opt.is_none() { +// log::error!("[get_import_lines] Unable to read prompt_import_lines"); +// return None; +// } +// let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); +// let system_prompt_path_opt = read_file("/app/prompts/prompt_import_path"); +// if system_prompt_path_opt.is_none() { +// log::error!("[get_import_lines] Unable to read prompt_import_path"); +// return None; +// } +// let system_prompt_path = system_prompt_path_opt.expect("Empty system_prompt"); +// for path in file_paths { +// log::debug!("[get_import_lines] path = {:?}", path); +// let file_contents_res = std::fs::read_to_string(path.clone()); +// if file_contents_res.is_err() { +// let e = file_contents_res.expect_err("Empty error in file_content_res"); +// log::error!("[get_import_lines] Unable to read file: {:?}, error: {:?}", path, e); +// continue; +// } +// let file_contents = file_contents_res.expect("Uncaught error in file_content_res"); +// let numbered_content = numbered_content(file_contents); +// let chunks = numbered_content.chunks(20); +// let path_str = path.to_str().expect("Empty path"); +// let mut chunks_import_vec = Vec::::new(); +// for chunk in chunks { +// let chunk_str = chunk.join("\n"); +// let import_lines_opt = get_import_lines_chunk( +// &system_prompt_lines, &chunk_str, +// path_str).await; +// if import_lines_opt.is_none() { +// log::error!("[get_import_lines] Skipping chunk, unable to get import lines"); +// continue; +// } +// let import_lines_chunk = import_lines_opt.expect("Empty func_boundary_opt"); +// if let Some(import_paths) = get_import_path_file(&numbered_content, +// import_lines_chunk.clone(), &system_prompt_path, path_str).await { +// let chunk_import_info = ChunkImportInfo { import_lines: import_lines_chunk, import_paths }; +// chunks_import_vec.push(chunk_import_info); +// } +// } +// let import_info = FileImportInfo { +// import_chunk_info: chunks_import_vec, filepath: path_str.to_string() }; +// all_import_info.insert(path_str.to_string(), import_info); +// } +// if all_import_info.is_empty() { +// return None; +// } +// return Some(FilesImportInfo { file_import_map: all_import_info }); +// } + +// async fn get_import_lines_chunk(system_prompt_lines: &str, chunk_str: &str, file_path: &str) -> Option { +// let llm_req = LlmImportLineRequest { input: +// LlmImportLineInput { +// language: "rust".to_string(), +// file_path: file_path.to_string(), +// chunk: chunk_str.to_string() } }; +// let llm_req_res = serde_json::to_string(&llm_req); +// if llm_req_res.is_err() { +// log::error!("[get_import_lines_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); +// return None; +// } +// let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); +// let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", +// system_prompt_lines, llm_req_prompt); +// match call_llm_api(prompt).await { +// None => { +// log::error!("[get_import_lines_chunk] Failed to call LLM API"); +// return None; +// } +// Some(llm_response) => { +// let import_res = serde_json::from_str(&llm_response); +// if import_res.is_err() { +// log::error!( +// "[get_import_lines_chunk] funcdefs error: {}", +// import_res.expect_err("Empty error in funcdefs_res")); +// return None; +// } +// let import_lines_file: FileImportLines = import_res.expect("Uncaught error in funcdefs_res"); +// return Some(import_lines_file); +// } +// } +// } + +#[derive(Serialize, Deserialize, Debug)] +struct InputSchema { + function_name: String, + code_chunk: String, + language: String, + file_path: String, } -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct ChunkImportInfo { - import_lines: FileImportLines, - import_paths: Vec +// Output schema structure for matching import +#[derive(Serialize, Deserialize, Debug)] +struct MatchingImport { + line_number: u32, + import_statement: String, + possible_file_path: String, } -impl ChunkImportInfo { - pub fn import_paths(&self) -> &Vec { - &self.import_paths - } +// Full output schema structure +#[derive(Serialize, Deserialize, Debug)] +struct ImportPathOutput { + matching_import: MatchingImport, + notes: Option, } -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct FileImportInfo { - import_chunk_info: Vec, - filepath: String +// Instruction structure +#[derive(Serialize, Deserialize, Debug)] +struct Instructions { + input_schema: InputSchemaDescription, + output_schema: OutputSchemaDescription, + task_description: String, } -impl FileImportInfo { - pub fn all_import_paths(&self) -> Vec { - let all_paths: Vec = self.import_chunk_info - .iter() - .flat_map(|chunk| chunk.import_paths()) - .cloned() - .collect(); - return all_paths; - } +// Description of input schema +#[derive(Serialize, Deserialize, Debug)] +struct InputSchemaDescription { + function_name: String, + code_chunk: String, + language: String, + file_path: String, +} - pub fn filepath(&self) -> &String { - &self.filepath - } +// Description of output schema +#[derive(Serialize, Deserialize, Debug)] +struct OutputSchemaDescription { + matching_import: MatchingImportDescription, + notes: String, } -#[derive(Debug, Serialize, Default, Deserialize, Clone)] -pub struct FilesImportInfo { - file_import_map: HashMap +// Description for matching import schema +#[derive(Serialize, Deserialize, Debug)] +struct MatchingImportDescription { + line_number: String, + import_statement: String, + possible_file_path: String, } -impl FilesImportInfo { - pub fn files(&self) -> Vec<&String> { - self.file_import_map.keys().collect() - } - - pub fn is_import_in_file(&self, filename: &str, import_path: &ImportPath) -> bool { - self.file_import_map[filename].all_import_paths().contains(import_path) - } +// Complete structure for JSON input and output +#[derive(Serialize, Deserialize, Debug)] +struct ImportPathJsonStructure { + instructions: Instructions, + sample_input: InputSchema, + expected_output: ImportPathOutput, + input: Option, +} - pub fn file_import_info(&self, filename: &str) -> Option<&FileImportInfo> { - self.file_import_map.get(filename) +impl ImportPathJsonStructure { + fn set_input(&mut self, input_schema: InputSchema) { + self.input = Some(input_schema); } +} - pub fn file_import_map(&self) -> &HashMap { - &self.file_import_map - } +pub struct ImportIdentifier { + prompt_struct: ImportPathJsonStructure } -pub async fn get_import_lines(file_paths: &Vec) -> Option { - let mut all_import_info = HashMap::::new(); - let system_prompt_opt = read_file("/app/prompts/prompt_import_lines"); - if system_prompt_opt.is_none() { - log::error!("[get_import_lines] Unable to read prompt_import_lines"); - return None; - } - let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); - let system_prompt_path_opt = read_file("/app/prompts/prompt_import_path"); - if system_prompt_path_opt.is_none() { - log::error!("[get_import_lines] Unable to read prompt_import_path"); - return None; - } - let system_prompt_path = system_prompt_path_opt.expect("Empty system_prompt"); - for path in file_paths { - log::debug!("[get_import_lines] path = {:?}", path); - let file_contents_res = std::fs::read_to_string(path.clone()); - if file_contents_res.is_err() { - let e = file_contents_res.expect_err("Empty error in file_content_res"); - log::error!("[get_import_lines] Unable to read file: {:?}, error: {:?}", path, e); - continue; +impl ImportIdentifier { + pub fn new() -> Option { + let system_prompt_opt = read_file("/app/prompts/prompt_import_file_path"); + if system_prompt_opt.is_none() { + log::debug!("[ImportIdentifier/new] Unable to read prompt_import_file"); + return None; } - let file_contents = file_contents_res.expect("Uncaught error in file_content_res"); - let numbered_content = numbered_content(file_contents); - let chunks = numbered_content.chunks(20); - let path_str = path.to_str().expect("Empty path"); - let mut chunks_import_vec = Vec::::new(); - for chunk in chunks { - let chunk_str = chunk.join("\n"); - let import_lines_opt = get_import_lines_chunk( - &system_prompt_lines, &chunk_str, - path_str).await; - if import_lines_opt.is_none() { - log::error!("[get_import_lines] Skipping chunk, unable to get import lines"); - continue; - } - let import_lines_chunk = import_lines_opt.expect("Empty func_boundary_opt"); - if let Some(import_paths) = get_import_path_file(&numbered_content, - import_lines_chunk.clone(), &system_prompt_path, path_str).await { - let chunk_import_info = ChunkImportInfo { import_lines: import_lines_chunk, import_paths }; - chunks_import_vec.push(chunk_import_info); - } + let system_prompt_str = system_prompt_opt.expect("Empty system_prompt_opt"); + let sys_prompt_struct_res = serde_json::from_str(&system_prompt_str); + if sys_prompt_struct_res.is_err() { + log::debug!("[ImportIdentifier/new] Unable to deserialize sys prompt: {:?}", + sys_prompt_struct_res.expect_err("Empty error")); + return None; } - let import_info = FileImportInfo { - import_chunk_info: chunks_import_vec, filepath: path_str.to_string() }; - all_import_info.insert(path_str.to_string(), import_info); + let sys_prompt_struct: ImportPathJsonStructure = sys_prompt_struct_res.expect("Uncaught error in sys_prompt_struct_res"); + return Some(Self { + prompt_struct: sys_prompt_struct + }); } - if all_import_info.is_empty() { - return None; - } - return Some(FilesImportInfo { file_import_map: all_import_info }); -} - -async fn get_import_lines_chunk(system_prompt_lines: &str, chunk_str: &str, file_path: &str) -> Option { - let llm_req = LlmImportLineRequest { input: - LlmImportLineInput { - language: "rust".to_string(), + pub async fn get_import_path(&mut self, func_name: &str, lang: &str, file_path: &str, chunk: &str) -> Option{ + // create prompt + let input_schema = InputSchema { + function_name: func_name.to_string(), + code_chunk: chunk.to_string(), + language: lang.to_string(), file_path: file_path.to_string(), - chunk: chunk_str.to_string() } }; - let llm_req_res = serde_json::to_string(&llm_req); - if llm_req_res.is_err() { - log::error!("[get_import_lines_chunk] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); - return None; - } - let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); - let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", - system_prompt_lines, llm_req_prompt); - match call_llm_api(prompt).await { - None => { - log::error!("[get_import_lines_chunk] Failed to call LLM API"); + }; + self.prompt_struct.set_input(input_schema); + // call api + let import_struct_str_res = serde_json::to_string(&self.prompt_struct); + if import_struct_str_res.is_err() { + log::debug!( + "[ImportIdentifier/get_import_path] Unable to deserialize prompt struct: {:?}", + import_struct_str_res.expect_err("Empty error in import_struct_str_res")); return None; } - Some(llm_response) => { - let import_res = serde_json::from_str(&llm_response); - if import_res.is_err() { - log::error!( - "[get_import_lines_chunk] funcdefs error: {}", - import_res.expect_err("Empty error in funcdefs_res")); - return None; - } - let import_lines_file: FileImportLines = import_res.expect("Uncaught error in funcdefs_res"); - return Some(import_lines_file); + let import_struct_str = import_struct_str_res.expect("Uncaught error in import_struct_str_res"); + let prompt_str = format!("{}\nOutput -", &import_struct_str); + let import_path_opt = call_llm_api(prompt_str).await; + // deserialize output + if import_path_opt.is_none() { + log::debug!("[ImportIdentifier/get_import_path] Unable to call llm api"); + return None; + } + let import_path_str = import_path_opt.expect("Empty import_path_opt"); + let import_path_res = serde_json::from_str(&import_path_str); + if import_path_res.is_err() { + log::debug!( + "[ImportIdentifier/get_import_path] Unable to deserialize import path output : {:?}", + import_path_res.expect_err("Empty error in import_path_res")); + return None; } + let import_path: ImportPathOutput = import_path_res.expect("Unacaught error in import_path_res"); + return Some(import_path); } -} -async fn get_import_path_file(numbered_content: &Vec, import_line: FileImportLines, system_prompt: &str, file_path: &str) -> Option> { - let mut import_paths = Vec::::new(); - // get import lines from numbered lines - let import_lines_str_opt = numbered_import_lines(numbered_content, import_line); - if import_lines_str_opt.is_none() { - log::error!("[get_import_path_file] Unable to get numbered import line"); - return None; - } - let import_lines_str_chunks = import_lines_str_opt.expect("Empty import_lines_str_opt"); - for import_lines_chunk in import_lines_str_chunks { - let llm_req = LlmImportPathRequest{ - input: LlmImportPathInput { - language: "rust".to_string(), - file_path: file_path.to_string(), - import_lines: import_lines_chunk - } - }; - let llm_req_res = serde_json::to_string(&llm_req); - if llm_req_res.is_err() { - log::error!("[get_import_path_file] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); + pub async fn get_import_path_file(&mut self, file_path: &str, lang: &str, func_name: &str) -> Option { + let file_contents_res = std::fs::read_to_string(file_path); + if file_contents_res.is_err() { + let e = file_contents_res.expect_err("Empty error in file_content_res"); + log::error!("[get_import_lines] Unable to read file: {:?}, error: {:?}", file_path, e); return None; } - let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); - let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", - system_prompt, llm_req_prompt); - match call_llm_api(prompt).await { - None => { - log::error!("[get_import_path_file] Failed to call LLM API"); - return None; - } - Some(llm_response) => { - let import_res = serde_json::from_str(&llm_response); - if import_res.is_err() { - log::error!( - "[get_import_path_file] funcdefs error: {}", - import_res.expect_err("Empty error in funcdefs_res")); - continue; - } - let import_path: ImportPaths = import_res.expect("Uncaught error in funcdefs_res"); - import_paths.push(import_path); + let file_contents = file_contents_res.expect("Uncaught error in file_content_res"); + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(20); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + let import_path_opt = self.get_import_path(func_name, lang, file_path, &chunk_str).await; + if import_path_opt.is_some() { + return import_path_opt; } } - } - if import_paths.is_empty() { return None; } - let import_path_vec: Vec = import_paths - .iter() - .flat_map(|ip| ip.imports.iter().cloned()) - .collect(); - return Some(import_path_vec); } -fn numbered_import_lines(numbered_content: &Vec, import_line: FileImportLines) -> Option>{ - let mut chunks = Vec::new(); - let mut chunk = String::new(); - let mut line_count = 0; +// async fn get_import_path_file(chunk: &Vec, func_name: &str, lang: &str, file_path: &str) -> Option> { + + + +// let mut import_paths = Vec::::new(); +// // get import lines from numbered lines +// let import_lines_str_opt = numbered_import_lines(numbered_content, import_line); +// if import_lines_str_opt.is_none() { +// log::error!("[get_import_path_file] Unable to get numbered import line"); +// return None; +// } +// let import_lines_str_chunks = import_lines_str_opt.expect("Empty import_lines_str_opt"); +// for import_lines_chunk in import_lines_str_chunks { +// let llm_req = LlmImportPathRequest{ +// input: LlmImportPathInput { +// language: "rust".to_string(), +// file_path: file_path.to_string(), +// import_lines: import_lines_chunk +// } +// }; +// let llm_req_res = serde_json::to_string(&llm_req); +// if llm_req_res.is_err() { +// log::error!("[get_import_path_file] Error in serializing llm req: {}", llm_req_res.expect_err("Empty error in llm_req_res")); +// return None; +// } +// let llm_req_prompt = llm_req_res.expect("Uncaught error in llm_req_res"); +// let prompt = format!("{}\n\n### User Message\nInput -\n{}\nOutput -", +// system_prompt, llm_req_prompt); +// match call_llm_api(prompt).await { +// None => { +// log::error!("[get_import_path_file] Failed to call LLM API"); +// return None; +// } +// Some(llm_response) => { +// let import_res = serde_json::from_str(&llm_response); +// if import_res.is_err() { +// log::error!( +// "[get_import_path_file] funcdefs error: {}", +// import_res.expect_err("Empty error in funcdefs_res")); +// continue; +// } +// let import_path: ImportPaths = import_res.expect("Uncaught error in funcdefs_res"); +// import_paths.push(import_path); +// } +// } +// } +// if import_paths.is_empty() { +// return None; +// } +// let import_path_vec: Vec = import_paths +// .iter() +// .flat_map(|ip| ip.imports.iter().cloned()) +// .collect(); +// return Some(import_path_vec); +// } - for line in import_line.lines { - if line_count == 30 { - chunks.push(chunk.clone()); - chunk = String::new(); - line_count = 0; - } - chunk += &numbered_content[line as usize]; - line_count += 1; - } +// fn numbered_import_lines(numbered_content: &Vec, import_line: FileImportLines) -> Option>{ +// let mut chunks = Vec::new(); +// let mut chunk = String::new(); +// let mut line_count = 0; - // Push the last chunk if it's not empty - if !chunk.is_empty() { - chunks.push(chunk); - } +// for line in import_line.lines { +// if line_count == 10 { +// chunks.push(chunk.clone()); +// chunk = String::new(); +// line_count = 0; +// } +// chunk += &numbered_content[line as usize]; +// line_count += 1; +// } - if chunks.is_empty() { - return None; - } - Some(chunks) -} \ No newline at end of file +// // Push the last chunk if it's not empty +// if !chunk.is_empty() { +// chunks.push(chunk); +// } + +// if chunks.is_empty() { +// return None; +// } +// Some(chunks) +// } \ No newline at end of file diff --git a/vibi-dpu/src/graph/function_call.rs b/vibi-dpu/src/graph/function_call.rs index dd433d16..751a3f77 100644 --- a/vibi-dpu/src/graph/function_call.rs +++ b/vibi-dpu/src/graph/function_call.rs @@ -1,8 +1,9 @@ use std::{collections::HashMap, path::{Path, PathBuf}}; use serde::{Deserialize, Serialize}; +use serde_json::json; -use super::{file_imports::get_import_lines, gitops::HunkDiffMap, utils::{call_llm_api, numbered_content, read_file}}; +use super::{gitops::HunkDiffMap, utils::{call_llm_api, numbered_content, read_file}}; #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FunctionCallChunk { @@ -139,4 +140,148 @@ pub async fn function_calls_in_file(filepath: &PathBuf, func_name: &str) -> Opti // return None; // } // return Some(file_func_call_map); -// } \ No newline at end of file +// } +#[derive(Serialize, Deserialize, Debug)] +struct InputSchema { + code_chunk: String, + language: String, + file_path: String, +} + +// Structure for function calls in the output schema +#[derive(Serialize, Deserialize, Debug)] +struct FunctionCall { + line_number: u32, + function_name: String, +} + +// Output schema structure +#[derive(Serialize, Deserialize, Debug)] +struct FunctionCallsOutput { + function_calls: Vec, + notes: Option, +} + +// Instruction structure +#[derive(Serialize, Deserialize, Debug)] +struct Instructions { + input_schema: InputSchemaDescription, + output_schema: OutputSchemaDescription, + task_description: String, +} + +// Description of input schema +#[derive(Serialize, Deserialize, Debug)] +struct InputSchemaDescription { + code_chunk: String, + language: String, + file_path: String, +} + +// Description of output schema +#[derive(Serialize, Deserialize, Debug)] +struct OutputSchemaDescription { + function_calls: Vec, + notes: String, +} + +// Description for each function call in output +#[derive(Serialize, Deserialize, Debug)] +struct FunctionCallDescription { + line_number: String, + function_name: String, +} + +// Complete structure for JSON input and output +#[derive(Serialize, Deserialize, Debug)] +struct JsonStructure { + instructions: Instructions, + sample_input: InputSchema, + expected_output: FunctionCallsOutput, + input: Option, +} + +impl JsonStructure { + fn set_input(&mut self, input: InputSchema) { + self.input = Some(input); + } +} + +pub struct FunctionCallIdentifier { + prompt: JsonStructure +} + +impl FunctionCallIdentifier { + pub fn new() -> Option { + let system_prompt_opt = read_file("/app/prompts/prompt_function_call"); + if system_prompt_opt.is_none() { + log::error!("[function_calls_in_chunk] Unable to read prompt_function_call"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let prompt_json_res = serde_json::from_str(&system_prompt_lines); + if prompt_json_res.is_err() { + log::error!("[FunctionCallIdentifier/new] Unable to deserialize prompt_json: {:?}", + prompt_json_res.expect("Empty bprompt_json_res")); + return None; + } + let prompt_json: JsonStructure = prompt_json_res.expect("Empty error in prompt_json_res"); + return Some(Self { prompt: prompt_json}); + } + + pub async fn functions_in_file(&mut self, filepath: &PathBuf, lang: &str) -> Option { + // concatenate functioncallsoutput for all chunks + let mut all_func_calls: FunctionCallsOutput = FunctionCallsOutput{ function_calls: vec![], notes: None }; + // TODO + let file_contents_res = std::fs::read_to_string(filepath.clone()); + if file_contents_res.is_err() { + log::error!( + "[FunctionCallIdentifier/functions_in_file] Unable to read file: {:?}, error: {:?}", + &filepath, file_contents_res.expect_err("Empty error in file_contents_res") + ); + return None; + } + let file_contents = file_contents_res.expect("Uncaught error in file_contents_res"); + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(50); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + if let Some(mut func_calls) = self.functions_in_chunk(&chunk_str, filepath, lang).await { + all_func_calls.function_calls.append(&mut func_calls.function_calls); + } + } + if all_func_calls.function_calls.is_empty() { + return None; + } + return Some(all_func_calls); + } + + pub async fn functions_in_chunk(&mut self, chunk: &str, filepath: &PathBuf, lang: &str) -> Option { + let input = InputSchema{ code_chunk: chunk.to_string(), language: lang.to_string(), + file_path: filepath.to_str().expect("Empty filepath").to_string() }; + self.prompt.input = Some(input); + let prompt_str_res = serde_json::to_string(&self.prompt); + if prompt_str_res.is_err() { + log::error!( + "[FunctionCallIdentifier/functions_in_chunk] Unable to serialize prompt: {:?}", + prompt_str_res.expect_err("Empty error in prompt_str_res")); + return None; + } + let prompt_str = prompt_str_res.expect("Uncaught error in prompt_str_res"); + let final_prompt = format!("{}\nOutput - ", &prompt_str); + let prompt_response_opt = call_llm_api(final_prompt).await; + if prompt_response_opt.is_none() { + log::error!("[FunctionCallIdentifier/functions_in_chunk] Unable to call llm for chunk: {:?}", chunk); + return None; + } + let prompt_response = prompt_response_opt.expect("Empty prompt_response_opt"); + let deserialized_response = serde_json::from_str(&prompt_response); + if deserialized_response.is_err() { + let e = deserialized_response.expect_err("Empty error in deserialized_response"); + log::error!("[FunctionCallIdentifier/functions_in_chunk] Error in deserializing response: {:?}", e); + return None; + } + let func_calls: FunctionCallsOutput = deserialized_response.expect("Empty error in deserialized_response"); + return Some(func_calls); + } +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs index 239e5d87..5e8a8131 100644 --- a/vibi-dpu/src/graph/graph_edges.rs +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -9,6 +9,10 @@ pub async fn graph_edges(base_filepaths: &Vec, head_filepaths: &Vec, review: &Review, all_import_info: &FilesImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { + // filter files with ripgrep + // for each filtered file + // get func call + // get func def for (dest_filename, func_defs) in diff_graph.diff_func_defs() { let mut dest_file_rel = dest_filename.to_string(); if let Some(dest_file_relative_path) = absolute_to_relative_path(&dest_filename, review) { @@ -206,144 +210,150 @@ async fn outgoing_edges(base_filepaths: &Vec, head_filepaths: &Vec } impl FuncCall { - pub fn import_info(&self) -> &ImportPath { - &self.import_info - } + // pub fn import_info(&self) -> &ImportPath { + // &self.import_info + // } pub fn call_info(&self) -> &Vec { &self.call_info } - pub fn func_call_hunk_lines(&self, hunk_diff: &HunkDiffLines) -> Option { - let mut hunk_func_calls_lines = Vec::::new(); - for func_call in self.call_info() { - for call_line in func_call.function_calls() { - if hunk_diff.start_line() <= call_line && hunk_diff.end_line() >= call_line { - hunk_func_calls_lines.push(call_line.to_owned()); - } - } - } - if hunk_func_calls_lines.is_empty() { - return None; - } - let hunk_func_call = FuncCall{ - import_info: self.import_info.clone(), - call_info: vec![FunctionCallChunk::new(hunk_func_calls_lines, - self.import_info().imported().to_string())]}; - return Some(hunk_func_call); - } + // pub fn func_call_hunk_lines(&self, hunk_diff: &HunkDiffLines) -> Option { + // let mut hunk_func_calls_lines = Vec::::new(); + // for func_call in self.call_info() { + // for call_line in func_call.function_calls() { + // if hunk_diff.start_line() <= call_line && hunk_diff.end_line() >= call_line { + // hunk_func_calls_lines.push(call_line.to_owned()); + // } + // } + // } + // if hunk_func_calls_lines.is_empty() { + // return None; + // } + // let hunk_func_call = FuncCall{ + // import_info: self.import_info.clone(), + // call_info: vec![FunctionCallChunk::new(hunk_func_calls_lines, + // self.import_info().imported().to_string())]}; + // return Some(hunk_func_call); + // } - pub fn function_name(&self) -> &String { - self.import_info.imported() - } + // pub fn function_name(&self) -> &String { + // self.import_info.imported() + // } } #[derive(Debug, Default, Clone)] @@ -90,7 +90,7 @@ impl DiffFuncCall { #[derive(Debug, Default, Clone)] pub struct DiffGraph { diff_files_func_defs: AllFileFunctions, - diff_files_imports: FilesImportInfo, + // diff_files_imports: FilesImportInfo, diff_files_func_calls: HashMap>, diff_func_defs: HashMap, diff_func_calls: HashMap, @@ -109,9 +109,9 @@ impl DiffGraph { &self.diff_files_func_defs } - pub fn all_file_imports(&self) -> &FilesImportInfo { - &self.diff_files_imports - } + // pub fn all_file_imports(&self) -> &FilesImportInfo { + // &self.diff_files_imports + // } pub fn diff_func_defs(&self) -> &HashMap { &self.diff_func_defs @@ -131,7 +131,7 @@ impl DiffGraph { } } -pub async fn generate_diff_graph(diff_files: &Vec, review: &Review, base_commit_import_info: &FilesImportInfo) -> Option { +pub async fn generate_diff_graph(diff_files: &Vec, review: &Review) -> Option { let diff_code_files_opt = source_diff_files(diff_files); if diff_code_files_opt.is_none() { log::debug!("[generate_diff_graph] No relevant source diff files in: {:#?}", diff_files); @@ -141,13 +141,14 @@ pub async fn generate_diff_graph(diff_files: &Vec, review: &Review, ba let hunk_diff_map = get_changed_hunk_lines(&diff_code_files, review); // get func defs for base commit for files in diff log::debug!("[generate_diff_graph] hunk diff map =======~~~~~~~~ {:#?}", &hunk_diff_map); - let diff_graph_opt = process_hunk_diff(&hunk_diff_map, base_commit_import_info, review).await; + let diff_graph_opt = process_hunk_diff(&hunk_diff_map, review).await; return diff_graph_opt; } -async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, base_commit_import_info: &FilesImportInfo,review: &Review) -> Option { +async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, review: &Review) -> Option { // full graph func def and import info for diff selected files is required. let all_diff_files = hunk_diff_map.all_files_pathbuf(review.clone_dir()); + // do generate function defs , only starting line let base_commit_func_defs_opt = generate_function_map(&all_diff_files).await; if base_commit_func_defs_opt.is_none() { log::debug!("[process_hunk_diff] Unable to generate func defs for base commit"); @@ -156,23 +157,23 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, base_commit_import_info: let base_commit_func_defs = base_commit_func_defs_opt.expect("Empty let base_commit_func_defs_opt"); git_checkout_commit(review, &review.pr_head_commit()); let diff_func_defs_opt = generate_function_map(&all_diff_files).await; - let diff_imports_opt = get_import_lines(&all_diff_files).await; + // let diff_imports_opt = get_import_lines(&all_diff_files).await; // TODO FIXME - opt logic if diff_func_defs_opt.is_none() { log::debug!("[process_hunk_diff] Unable to generate func definitions diff map"); return None; } - if diff_imports_opt.is_none() { - log::debug!("[process_hunk_diff] Unable to generate func imports diff map"); - return None; - } + // if diff_imports_opt.is_none() { + // log::debug!("[process_hunk_diff] Unable to generate func imports diff map"); + // return None; + // } let diff_files_func_defs = diff_func_defs_opt.expect("Empty all_file_func_defs_opt)"); - let diff_files_imports = diff_imports_opt.expect("Empty all_file_imports_opt"); - let diff_files_func_calls = diff_file_func_calls(&all_diff_files, &diff_files_imports, &diff_files_func_defs).await; + // let diff_files_imports = diff_imports_opt.expect("Empty all_file_imports_opt"); + let diff_files_func_calls = diff_file_func_calls(&all_diff_files, &diff_files_func_defs).await; let mut diff_graph = DiffGraph { diff_files_func_calls, diff_files_func_defs, - diff_files_imports, + // diff_files_imports, diff_func_defs: HashMap::new(), diff_func_calls: HashMap::new(), }; @@ -208,57 +209,57 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, base_commit_import_info: // find func call in hunks for each import // want to record not all func_calls but hunk specific line numbers // might need to reorder for loops to make sure repeated calcs are avoided - if let Some(imports_info) = diff_graph.all_file_imports().file_import_info(filename) { - for import_info in imports_info.all_import_paths() { - if let Some(func_call) = diff_graph.func_calls_for_func(import_info.imported(), filename) { - diff_func_calls_add.add_added_calls(func_call.to_owned()); - } - // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls - // TODO FIXME - need function call calc for all diff files, need to search for funcdefs as well as imports - // if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { - // // func_calls is basically all func calls of a function in the latest commit of the file - // if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { - // let func_call = FuncCall{ import_info, call_info: func_calls }; - // for hunk_diff in file_line_map.added_hunks() { - // if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { - // diff_func_calls_add.add_added_calls(hunk_func_call); - // } - // } - // } - // } - } - } - // Use full graph's import info + // if let Some(imports_info) = diff_graph.all_file_imports().file_import_info(filename) { + // for import_info in imports_info.all_import_paths() { + // if let Some(func_call) = diff_graph.func_calls_for_func(import_info.imported(), filename) { + // diff_func_calls_add.add_added_calls(func_call.to_owned()); + // } + // // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls + // // TODO FIXME - need function call calc for all diff files, need to search for funcdefs as well as imports + // // if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { + // // // func_calls is basically all func calls of a function in the latest commit of the file + // // if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + // // let func_call = FuncCall{ import_info, call_info: func_calls }; + // // for hunk_diff in file_line_map.added_hunks() { + // // if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { + // // diff_func_calls_add.add_added_calls(hunk_func_call); + // // } + // // } + // // } + // // } + // } + // } + // // Use full graph's import info // do a git checkout to base commit // do the same thing as done for added_calls } diff_graph.add_func_def(filename.to_string(), diff_func_defs); - diff_func_calls_map.insert(filename.to_string(), diff_func_calls_add); - } - git_checkout_commit(review, &review.base_head_commit()); - for filepath in &all_diff_files { - let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); - let diff_func_call_del = diff_func_calls_map.entry(filename.to_string()).or_insert(DiffFuncCall { added_calls: Vec::new(), deleted_calls: Vec::new() }); - if let Some(imports_info) = base_commit_import_info.file_import_info(filename) { - for import_info in imports_info.all_import_paths() { - // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls - if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { - // func_calls is basically all func calls of a function in the latest commit of the file - if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { - let func_call = FuncCall{ import_info, call_info: func_calls }; - for hunk_diff in file_line_map.deleted_hunks() { - if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { - diff_func_call_del.add_deleted_calls(hunk_func_call); - } - } - } - } - } - } - } - for (filename, diff_func_call) in diff_func_calls_map.iter() { - diff_graph.add_diff_func_calls(filename.to_owned(), diff_func_call.to_owned()); + // diff_func_calls_map.insert(filename.to_string(), diff_func_calls_add); } + // git_checkout_commit(review, &review.base_head_commit()); + // for filepath in &all_diff_files { + // let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); + // let diff_func_call_del = diff_func_calls_map.entry(filename.to_string()).or_insert(DiffFuncCall { added_calls: Vec::new(), deleted_calls: Vec::new() }); + // if let Some(imports_info) = base_commit_import_info.file_import_info(filename) { + // for import_info in imports_info.all_import_paths() { + // // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls + // if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { + // // func_calls is basically all func calls of a function in the latest commit of the file + // if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + // let func_call = FuncCall{ import_info, call_info: func_calls }; + // for hunk_diff in file_line_map.deleted_hunks() { + // if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { + // diff_func_call_del.add_deleted_calls(hunk_func_call); + // } + // } + // } + // } + // } + // } + // } + // for (filename, diff_func_call) in diff_func_calls_map.iter() { + // diff_graph.add_diff_func_calls(filename.to_owned(), diff_func_call.to_owned()); + // } return Some(diff_graph); } diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs index f30bd761..f6e8938e 100644 --- a/vibi-dpu/src/graph/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -1,7 +1,7 @@ use crate::{graph::{elements::MermaidGraphElements, graph_edges::graph_edges, graph_info::generate_diff_graph}, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; -use super::{file_imports::{get_import_lines, ChunkImportInfo, FileImportInfo, FileImportLines, FilesImportInfo, ImportPath}, function_call::FunctionCallChunk, function_line_range::{AllFileFunctions, FuncDefInfo, FunctionFileMap}, graph_info::{DiffFuncCall, DiffFuncDefs, DiffGraph, FuncCall}, utils::all_code_files}; +use super::{function_call::FunctionCallChunk, function_line_range::{AllFileFunctions, FuncDefInfo, FunctionFileMap}, graph_info::{DiffFuncCall, DiffFuncDefs, DiffGraph, FuncCall}, utils::all_code_files}; pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Review) -> Option { @@ -38,15 +38,8 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review return None; } let base_filepaths = base_filepaths_opt.expect("Empty base_filepaths_opt"); - let base_commit_import_info_opt = get_import_lines(&base_filepaths).await; - log::debug!("[generate_flowchart_elements] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ all_file_import_info_opt = {:#?}", &base_commit_import_info_opt); - if base_commit_import_info_opt.is_none() { - log::error!("[generate_flowchart_elements] Unable to get import info for source files: {:#?}", &base_filepaths); - return None; - } - let base_commit_import_info = base_commit_import_info_opt.expect("Empty import_lines_opt"); // let base_commit_import_info = get_test_import_info(); - let diff_graph_opt = generate_diff_graph(diff_files, review, &base_commit_import_info).await; + let diff_graph_opt = generate_diff_graph(diff_files, review).await; log::debug!("[generate_flowchart_elements] diff_graph_opt = {:#?}", &diff_graph_opt); if diff_graph_opt.is_none() { log::error!( From 6dba80703acc6fa18f6851715fc26fade82bf75d Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Thu, 10 Oct 2024 19:28:38 +0530 Subject: [PATCH 34/43] Implement looking up file path from import --- vibi-dpu/src/db/mod.rs | 3 +- vibi-dpu/src/graph/file_imports.rs | 18 +- vibi-dpu/src/graph/function_call.rs | 90 +++- vibi-dpu/src/graph/function_line_range.rs | 64 ++- vibi-dpu/src/graph/graph_edges.rs | 606 ++++++++++++++-------- vibi-dpu/src/graph/graph_info.rs | 174 ++++--- vibi-dpu/src/graph/mermaid_elements.rs | 19 +- 7 files changed, 646 insertions(+), 328 deletions(-) diff --git a/vibi-dpu/src/db/mod.rs b/vibi-dpu/src/db/mod.rs index 10e5bbd6..dff5cbd4 100644 --- a/vibi-dpu/src/db/mod.rs +++ b/vibi-dpu/src/db/mod.rs @@ -9,5 +9,4 @@ pub mod repo_config; pub mod prs; pub mod bitbucket; pub mod github; -pub mod aliases; -pub mod graph_info; \ No newline at end of file +pub mod aliases; \ No newline at end of file diff --git a/vibi-dpu/src/graph/file_imports.rs b/vibi-dpu/src/graph/file_imports.rs index 273639dc..e92f98ef 100644 --- a/vibi-dpu/src/graph/file_imports.rs +++ b/vibi-dpu/src/graph/file_imports.rs @@ -226,19 +226,31 @@ struct InputSchema { // Output schema structure for matching import #[derive(Serialize, Deserialize, Debug)] -struct MatchingImport { +pub struct MatchingImport { line_number: u32, import_statement: String, possible_file_path: String, } +impl MatchingImport { + pub fn possible_file_path(&self) -> &String { + &self.possible_file_path + } +} + // Full output schema structure #[derive(Serialize, Deserialize, Debug)] -struct ImportPathOutput { +pub struct ImportPathOutput { matching_import: MatchingImport, notes: Option, } +impl ImportPathOutput { + pub fn get_matching_import(&self) -> &MatchingImport { + &self.matching_import + } +} + // Instruction structure #[derive(Serialize, Deserialize, Debug)] struct Instructions { @@ -309,7 +321,7 @@ impl ImportIdentifier { prompt_struct: sys_prompt_struct }); } - pub async fn get_import_path(&mut self, func_name: &str, lang: &str, file_path: &str, chunk: &str) -> Option{ + async fn get_import_path(&mut self, func_name: &str, lang: &str, file_path: &str, chunk: &str) -> Option{ // create prompt let input_schema = InputSchema { function_name: func_name.to_string(), diff --git a/vibi-dpu/src/graph/function_call.rs b/vibi-dpu/src/graph/function_call.rs index 751a3f77..3cd6a608 100644 --- a/vibi-dpu/src/graph/function_call.rs +++ b/vibi-dpu/src/graph/function_call.rs @@ -1,9 +1,10 @@ -use std::{collections::HashMap, path::{Path, PathBuf}}; +use std::{collections::{HashMap, HashSet}, io::BufReader, path::{Path, PathBuf}, process::{Command, Stdio}}; use serde::{Deserialize, Serialize}; -use serde_json::json; +use std::io::BufRead; +use crate::utils::review::Review; -use super::{gitops::HunkDiffMap, utils::{call_llm_api, numbered_content, read_file}}; +use super::{gitops::{HunkDiffLines, HunkDiffMap}, utils::{call_llm_api, numbered_content, read_file}}; #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FunctionCallChunk { @@ -149,19 +150,35 @@ struct InputSchema { } // Structure for function calls in the output schema -#[derive(Serialize, Deserialize, Debug)] -struct FunctionCall { +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct FunctionCall { line_number: u32, function_name: String, } +impl FunctionCall { + pub fn function_name(&self) -> &String { + &self.function_name + } + + pub fn line_number(&self) -> &u32 { + &self.line_number + } +} + // Output schema structure -#[derive(Serialize, Deserialize, Debug)] -struct FunctionCallsOutput { +#[derive(Default, Serialize, Deserialize, Debug, Clone)] +pub struct FunctionCallsOutput { function_calls: Vec, notes: Option, } +impl FunctionCallsOutput { + pub fn function_calls(&self) -> &Vec { + return &self.function_calls + } +} + // Instruction structure #[derive(Serialize, Deserialize, Debug)] struct Instructions { @@ -208,7 +225,8 @@ impl JsonStructure { } pub struct FunctionCallIdentifier { - prompt: JsonStructure + prompt: JsonStructure, + chunk_size: usize } impl FunctionCallIdentifier { @@ -226,13 +244,12 @@ impl FunctionCallIdentifier { return None; } let prompt_json: JsonStructure = prompt_json_res.expect("Empty error in prompt_json_res"); - return Some(Self { prompt: prompt_json}); + return Some(Self { prompt: prompt_json, chunk_size: 30}); } pub async fn functions_in_file(&mut self, filepath: &PathBuf, lang: &str) -> Option { // concatenate functioncallsoutput for all chunks let mut all_func_calls: FunctionCallsOutput = FunctionCallsOutput{ function_calls: vec![], notes: None }; - // TODO let file_contents_res = std::fs::read_to_string(filepath.clone()); if file_contents_res.is_err() { log::error!( @@ -256,7 +273,7 @@ impl FunctionCallIdentifier { return Some(all_func_calls); } - pub async fn functions_in_chunk(&mut self, chunk: &str, filepath: &PathBuf, lang: &str) -> Option { + async fn functions_in_chunk(&mut self, chunk: &str, filepath: &PathBuf, lang: &str) -> Option { let input = InputSchema{ code_chunk: chunk.to_string(), language: lang.to_string(), file_path: filepath.to_str().expect("Empty filepath").to_string() }; self.prompt.input = Some(input); @@ -284,4 +301,55 @@ impl FunctionCallIdentifier { let func_calls: FunctionCallsOutput = deserialized_response.expect("Empty error in deserialized_response"); return Some(func_calls); } + + pub async fn function_calls_in_hunks(&mut self, filepath: &PathBuf, lang: &str, diff_hunks: &Vec) -> Option { + let func_calls_opt = self.functions_in_file(filepath, lang).await; + if func_calls_opt.is_none() { + log::debug!("[FunctionCallIdentifier/function_calls_in_hunks] No func calls in {:?}", filepath); + return None; + } + let mut func_calls = func_calls_opt.expect("Empty func_calls_opt"); + func_calls.function_calls.retain(|function_call| { + // Check if the function call's line number is outside of any hunk diff ranges + !diff_hunks.iter().any(|hunk| { + function_call.line_number >= *hunk.start_line() as u32 && function_call.line_number <= *hunk.end_line() as u32 + }) + }); + return Some(func_calls); + } +} + +pub fn function_calls_search(review: &Review, function_name: &str) -> Option>{ + let pattern = format!(r"{}\([^\)]*\)", function_name); // Regex pattern for the specific function call + let directory = review.clone_dir(); // The directory to search in (current directory here) + + // Spawn the ripgrep process, adding `-l` for filenames and `--absolute-path` for absolute paths + let rg_command_res = Command::new("rg") + .arg("--absolute-path") // Print absolute file paths + .arg("-l") // Print only filenames that contain matches + .arg("-e") // Use regular expression + .arg(pattern) // The regex pattern for function calls + .arg(directory) // Directory to search + .stdout(Stdio::piped()) // Pipe the output + .spawn(); // Spawn the ripgrep process + if rg_command_res.is_err() { + log::error!("[function_calls_search] error in rg command: {:?}", + rg_command_res.expect_err("Empty error in rg_command_res")); + return None; + } + let rg_command = rg_command_res.expect("Uncaught error in rg_command_res"); + // Capture the stdout of ripgrep + let stdout = rg_command.stdout.expect("Failed to capture stdout"); + let reader = BufReader::new(stdout); + + // Use a HashSet to avoid duplicate filenames + let mut files: HashSet = HashSet::new(); + + // Read the output line by line + for line in reader.lines() { + if let Ok(file) = line { // Each line is an absolute filename with a match + files.insert(file); + } + } + return Some(files); } \ No newline at end of file diff --git a/vibi-dpu/src/graph/function_line_range.rs b/vibi-dpu/src/graph/function_line_range.rs index 1ea40039..e8067905 100644 --- a/vibi-dpu/src/graph/function_line_range.rs +++ b/vibi-dpu/src/graph/function_line_range.rs @@ -1,11 +1,10 @@ use std::{collections::HashMap, path::PathBuf}; use serde::{Deserialize, Serialize}; -use walkdir::WalkDir; -use crate::{graph::utils::numbered_content, utils::review::Review}; +use crate::graph::utils::numbered_content; -use super::{gitops::HunkDiffLines, utils::{all_code_files, call_llm_api, read_file}}; +use super::{function_call::FunctionCall, gitops::HunkDiffLines, utils::{call_llm_api, read_file}}; #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FuncDefInfo { @@ -39,6 +38,18 @@ impl FuncDefInfo { } } +#[derive(Debug, Default, Clone)] +pub struct HunkFuncDef { + func_def: FuncDefInfo, + hunk_info: HunkDiffLines +} + +impl HunkFuncDef { + pub fn func_def(&self) -> &FuncDefInfo { + &self.func_def + } +} + #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FunctionFileMap { pub(crate) file_name: String, @@ -65,31 +76,44 @@ impl FunctionFileMap { |f| f.line_start <= line_num && line_num <= f.line_end) } - pub fn funcs_in_hunk(&self, hunk: &HunkDiffLines) -> Vec { - self.functions + pub fn funcs_in_hunk(&self, hunk: &HunkDiffLines) -> Vec { + let hunk_func_vec: Vec = self.functions .iter() - .filter(|func| { + .filter_map(|func| { // Check if the function's start or end line falls within the hunk's start and end line range - (func.line_start() >= hunk.start_line() && func.line_start() <= hunk.end_line()) || + if (func.line_start() >= hunk.start_line() && func.line_start() <= hunk.end_line()) || (func.line_end() >= hunk.start_line() && func.line_end() <= hunk.end_line()) || // Additionally check if the function completely spans over the hunk range (func.line_start() <= hunk.start_line() && func.line_end() >= hunk.end_line()) - }).cloned() - .collect() + { + let hunkfuncdef = HunkFuncDef { + func_def: func.clone(), + hunk_info: hunk.clone(), + }; + return Some(hunkfuncdef); + } + return None; + }).collect(); + return hunk_func_vec; } - pub fn funcs_for_lines(&self, lines: &Vec) -> HashMap { - let mut line_funcdef_map = HashMap::new(); - - for line in lines { - for func in &self.functions { - if func.line_start <= *line && *line <= func.line_end { - line_funcdef_map.entry(*line).or_insert(func.clone()); - } - } - } - return line_funcdef_map; + pub fn funcs_for_func_call(&self, func_call: &FunctionCall) -> Option<&FuncDefInfo>{ + let line_num = func_call.line_number().to_owned() as usize; + return self.func_at_line(line_num); } + + // pub fn funcs_for_lines(&self, lines: &Vec) -> HashMap { + // let mut line_funcdef_map = HashMap::new(); + + // for line in lines { + // for func in &self.functions { + // if func.line_start <= *line && *line <= func.line_end { + // line_funcdef_map.entry(*line).or_insert(func.clone()); + // } + // } + // } + // return line_funcdef_map; + // } } #[derive(Debug, Serialize, Default, Deserialize, Clone)] diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs index 5e8a8131..2f3414c5 100644 --- a/vibi-dpu/src/graph/graph_edges.rs +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -1,219 +1,257 @@ -use std::{path::{Path, PathBuf}, str::FromStr}; -use crate::{graph::utils::match_imported_filename_to_path, utils::{gitops::git_checkout_commit, review::Review}}; +use std::path::{Path, PathBuf}; +use crate::utils::{gitops::git_checkout_commit, review::Review}; -use super::{elements::MermaidGraphElements, file_imports::{FilesImportInfo, ImportPath}, function_call::function_calls_in_file, function_line_range::{generate_function_map, FuncDefInfo, FunctionFileMap}, graph_info::DiffGraph, utils::{absolute_to_relative_path, match_overlap}}; +use super::{elements::MermaidGraphElements, file_imports::ImportIdentifier, function_call::{function_calls_search, FunctionCallIdentifier}, function_line_range::generate_function_map, graph_info::DiffGraph, utils::absolute_to_relative_path}; -pub async fn graph_edges(base_filepaths: &Vec, head_filepaths: &Vec, review: &Review, all_import_info: &FilesImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { - outgoing_edges(base_filepaths, head_filepaths, diff_graph, graph_elems, review).await; - incoming_edges(head_filepaths, review, all_import_info, diff_graph, graph_elems).await; +pub async fn graph_edges(base_filepaths: &Vec, review: &Review, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, lang: &str) { + outgoing_edges(base_filepaths, diff_graph, graph_elems, review, lang).await; + incoming_edges(review, diff_graph, graph_elems, lang).await; } -async fn incoming_edges(head_filepaths: &Vec, review: &Review, all_import_info: &FilesImportInfo, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { +async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, lang :&str) { // filter files with ripgrep // for each filtered file // get func call // get func def - for (dest_filename, func_defs) in diff_graph.diff_func_defs() { - let mut dest_file_rel = dest_filename.to_string(); - if let Some(dest_file_relative_path) = absolute_to_relative_path(&dest_filename, review) { - dest_file_rel = dest_file_relative_path; - } - for dest_func in func_defs.added_func_defs() { - git_checkout_commit(review, review.pr_head_commit()); - // search in diff graph - for (source_filename, file_func_defs) in diff_graph.all_file_imports().file_import_map() { - let mut source_rel_path = source_filename.to_string(); - if let Some(src_relative_filepath) = absolute_to_relative_path(&source_rel_path, review) { - source_rel_path = src_relative_filepath; - } - let file_imports = file_func_defs.all_import_paths(); - for file_import in file_imports { - // search for correct import - if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, &file_import.import_path()) { - if match_import_func(&file_import, dest_func) { - // find func call - let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); - // TODO, FIXME - function_calls_in_file should have src_filename or src_filepath? - check other calls to the function as well - if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { - // call func in that takes vec of lines and returns funcdefs - let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); - for (line_num, source_func_def) in source_func_defs { - if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("", - line_num.to_owned(), - &source_func_def.name(), - &dest_func.name(), - &source_rel_path, - &dest_file_rel, - "", - "green", - source_func_def.line_start(), - dest_func.line_start() - ); - } - } - } - } - } - } - } - git_checkout_commit(review, review.base_head_commit()); - // search in full graph - for (source_filename, file_func_defs) in all_import_info.file_import_map() { - let mut source_file_rel = source_filename.to_string(); - if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { - source_file_rel = src_relative_filepath; - } - let file_imports = file_func_defs.all_import_paths(); - for file_import in file_imports { - // search for correct import - if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { - if match_import_func(&file_import, dest_func) { - // if found, create edge - let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); - if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { - // call func in that takes vec of lines and returns funcdefs - let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); - if source_func_defs_opt.is_none() { - log::debug!("[incoming_edges] No funcs for file: {}", source_filename); - continue; - } - let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); - for (line_num, source_func_def) in source_func_defs { - if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("", - line_num.to_owned(), - &source_func_def.name(), - &dest_func.name(), - &source_file_rel, - &dest_file_rel, - "", - "green", - source_func_def.line_start(), - dest_func.line_start() - ); - } - } - } - } - } - } - } - } - for dest_func in func_defs.deleted_func_defs() { - // search in diff graph - for (source_filename, file_func_defs) in diff_graph.all_file_imports().file_import_map() { - let mut source_file_rel = source_filename.to_string(); - if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { - source_file_rel = src_relative_filepath; - } - let file_imports = file_func_defs.all_import_paths(); - for file_import in file_imports { - // search for correct import - if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { - if match_import_func(&file_import, dest_func) { - // find func call - git_checkout_commit(review, review.pr_head_commit()); - let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); - if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { - // call func in that takes vec of lines and returns funcdefs - let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); - if source_func_defs_opt.is_none() { - log::debug!("[incoming_edges] No funcs for file: {}", source_filename); - continue; - } - let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); - for (line_num, source_func_def) in source_func_defs { - if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("", - line_num.to_owned(), - &source_func_def.name(), - &dest_func.name(), - &source_file_rel, - &dest_file_rel, - "", - "red", - source_func_def.line_start(), - dest_func.line_start() - ); - } - } - } - } - } - } - } - // search in full graph - for (source_filename, file_func_defs) in all_import_info.file_import_map() { - let mut source_file_rel = source_filename.to_string(); - if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { - source_file_rel = src_relative_filepath; - } - let file_imports = file_func_defs.all_import_paths(); - for file_import in file_imports { - // search for correct import - if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { - if match_import_func(&file_import, dest_func) { - // if found, create edge - let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); - if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { - // call func in that takes vec of lines and returns funcdefs - let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); - if source_func_defs_opt.is_none() { - log::debug!("[incoming_edges] No funcs for file: {}", source_filename); - continue; - } - let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); - for (line_num, source_func_def) in source_func_defs { - if source_func_def != dest_func.to_owned() { - graph_elems.add_edge("red", - line_num.to_owned(), - &source_func_def.name(), - &dest_func.name(), - &source_file_rel, - &dest_file_rel, - "", - "red", - source_func_def.line_start(), - dest_func.line_start() - ); - } - } - } - } - } - } - } - } + // for (dest_filename, func_defs) in diff_graph.diff_func_defs() { + // let mut dest_file_rel = dest_filename.to_string(); + // if let Some(dest_file_relative_path) = absolute_to_relative_path(&dest_filename, review) { + // dest_file_rel = dest_file_relative_path; + // } + // let all_files: Vec<&String> = diff_graph.diff_func_defs().keys().collect(); + // for (source_filename, file_func_defs) in diff_graph.all_file_imports().file_import_map() { + // let mut source_rel_path = source_filename.to_string(); + // if let Some(src_relative_filepath) = absolute_to_relative_path(&source_rel_path, review) { + // source_rel_path = src_relative_filepath; + // } + // let file_imports = file_func_defs.all_import_paths(); + // for file_import in file_imports { + // // search for correct import + // if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, &file_import.import_path()) { + // if match_import_func(&file_import, dest_func) { + // // find func call + // let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + // // TODO, FIXME - function_calls_in_file should have src_filename or src_filepath? - check other calls to the function as well + // if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // // call func in that takes vec of lines and returns funcdefs + // let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + // let source_func_defs = diff_graph.all_file_func_defs().functions_in_file(source_filename).expect("No source filename found").funcs_for_lines(&lines); + // for (line_num, source_func_def) in source_func_defs { + // if source_func_def != dest_func.to_owned() { + // graph_elems.add_edge("", + // line_num.to_owned(), + // &source_func_def.name(), + // &dest_func.name(), + // &source_rel_path, + // &dest_file_rel, + // "", + // "green", + // source_func_def.line_start(), + // dest_func.line_start() + // ); + // } + // } + // } + // } + // } + // } + // } + + // // search in full graph + // for (source_filename, file_func_defs) in all_import_info.file_import_map() { + // let mut source_file_rel = source_filename.to_string(); + // if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { + // source_file_rel = src_relative_filepath; + // } + // let file_imports = file_func_defs.all_import_paths(); + // for file_import in file_imports { + // // search for correct import + // if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { + // if match_import_func(&file_import, dest_func) { + // // if found, create edge + // let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + // if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // // call func in that takes vec of lines and returns funcdefs + // let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + // let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); + // if source_func_defs_opt.is_none() { + // log::debug!("[incoming_edges] No funcs for file: {}", source_filename); + // continue; + // } + // let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); + // for (line_num, source_func_def) in source_func_defs { + // if source_func_def != dest_func.to_owned() { + // graph_elems.add_edge("", + // line_num.to_owned(), + // &source_func_def.name(), + // &dest_func.name(), + // &source_file_rel, + // &dest_file_rel, + // "", + // "green", + // source_func_def.line_start(), + // dest_func.line_start() + // ); + // } + // } + // } + // } + // } + // } + // } + // } + // for dest_func in func_defs.deleted_func_defs() { + // // search in diff graph + // for (source_filename, file_func_defs) in diff_graph.all_file_imports().file_import_map() { + // let mut source_file_rel = source_filename.to_string(); + // if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { + // source_file_rel = src_relative_filepath; + // } + // let file_imports = file_func_defs.all_import_paths(); + // for file_import in file_imports { + // // search for correct import + // if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { + // if match_import_func(&file_import, dest_func) { + // // find func call + // git_checkout_commit(review, review.pr_head_commit()); + // let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + // if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // // call func in that takes vec of lines and returns funcdefs + // let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + // let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); + // if source_func_defs_opt.is_none() { + // log::debug!("[incoming_edges] No funcs for file: {}", source_filename); + // continue; + // } + // let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); + // for (line_num, source_func_def) in source_func_defs { + // if source_func_def != dest_func.to_owned() { + // graph_elems.add_edge("", + // line_num.to_owned(), + // &source_func_def.name(), + // &dest_func.name(), + // &source_file_rel, + // &dest_file_rel, + // "", + // "red", + // source_func_def.line_start(), + // dest_func.line_start() + // ); + // } + // } + // } + // } + // } + // } + // } + // // search in full graph + // for (source_filename, file_func_defs) in all_import_info.file_import_map() { + // let mut source_file_rel = source_filename.to_string(); + // if let Some(src_relative_filepath) = absolute_to_relative_path(&source_file_rel, review) { + // source_file_rel = src_relative_filepath; + // } + // let file_imports = file_func_defs.all_import_paths(); + // for file_import in file_imports { + // // search for correct import + // if let Some(dest_filepath) = match_imported_filename_to_path(head_filepaths, file_import.import_path()) { + // if match_import_func(&file_import, dest_func) { + // // if found, create edge + // let src_filepath = PathBuf::from_str(source_filename).expect("Unable to create pathbuf"); + // if let Some(func_call_vec) = function_calls_in_file(&src_filepath, dest_func.name()).await { + // // call func in that takes vec of lines and returns funcdefs + // let lines = func_call_vec.iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); + // let source_func_defs_opt = diff_graph.all_file_func_defs().functions_in_file(source_filename); + // if source_func_defs_opt.is_none() { + // log::debug!("[incoming_edges] No funcs for file: {}", source_filename); + // continue; + // } + // let source_func_defs = source_func_defs_opt.expect("No source filename found").funcs_for_lines(&lines); + // for (line_num, source_func_def) in source_func_defs { + // if source_func_def != dest_func.to_owned() { + // graph_elems.add_edge("red", + // line_num.to_owned(), + // &source_func_def.name(), + // &dest_func.name(), + // &source_file_rel, + // &dest_file_rel, + // "", + // "red", + // source_func_def.line_start(), + // dest_func.line_start() + // ); + // } + // } + // } + // } + // } + // } + // } + // } + // } + let func_call_identifier_opt = FunctionCallIdentifier::new(); + if func_call_identifier_opt.is_none() { + log::error!("[incoming_edges] Unable to create new FunctionCallIdentifier"); + return; } + let mut func_call_identifier = func_call_identifier_opt.expect("Empty func_call_identifier_opt"); + git_checkout_commit(review, review.pr_head_commit()); + process_func_defs( + review, + diff_graph, + &mut func_call_identifier, + lang, + graph_elems, + "green" + ).await; + git_checkout_commit(review, review.base_head_commit()); + process_func_defs( + review, + diff_graph, + &mut func_call_identifier, + lang, + graph_elems, + "red" + ).await; } -fn match_import_func(import_obj: &ImportPath, dest_func_info: &FuncDefInfo) -> bool { - log::debug!("[match_import_condition] import_obj.imported = {}, dest_func_info = {:#?}", import_obj.imported(), dest_func_info); - // TODO FIXME - first condition doesn't make sense, it should always be true? - have to check for all calls of this function - match_overlap(&dest_func_info.name(), - &import_obj.imported(), - 0.6) - || match_overlap(&dest_func_info.parent(), - &import_obj.imported(), - 0.6) -} +// fn match_import_func(import_obj: &ImportPath, dest_func_info: &FuncDefInfo) -> bool { +// log::debug!("[match_import_condition] import_obj.imported = {}, dest_func_info = {:#?}", import_obj.imported(), dest_func_info); +// // TODO FIXME - first condition doesn't make sense, it should always be true? - have to check for all calls of this function +// match_overlap(&dest_func_info.name(), +// &import_obj.imported(), +// 0.6) +// || match_overlap(&dest_func_info.parent(), +// &import_obj.imported(), +// 0.6) +// } -async fn outgoing_edges(base_filepaths: &Vec, head_filepaths: &Vec, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, review: &Review) { +async fn outgoing_edges(base_filepaths: &Vec, diff_graph: &DiffGraph, + graph_elems: &mut MermaidGraphElements, review: &Review, lang: &str) +{ + let import_identifier_opt = ImportIdentifier::new(); + if import_identifier_opt.is_none() { + log::debug!("[outgoing_edges] Unable to create import identifier"); + return; + } + let mut import_identifier = import_identifier_opt.expect("EMpty import_identifier_opt"); + git_checkout_commit(review, review.pr_head_commit()); + process_func_calls( + &mut import_identifier, + lang, + review, + diff_graph, + base_filepaths, + graph_elems, + "green").await; git_checkout_commit(review, review.base_head_commit()); - for (source_filepath, func_calls) in diff_graph.diff_func_calls() { - let mut source_file_name = source_filepath.to_owned(); - if let Some(source_file) = absolute_to_relative_path(source_filepath, review){ - source_file_name = source_file.clone(); - } - - // get func calls - // get import and path - // get file + process_func_calls(&mut import_identifier, + lang, + review, + diff_graph, + base_filepaths, + graph_elems, + "red").await; + // get func def // for source_func_call in func_calls.added_calls() { @@ -355,5 +393,167 @@ async fn outgoing_edges(base_filepaths: &Vec, head_filepaths: &Vec, + graph_elems: &mut MermaidGraphElements, edge_color: &str) +{ + for (source_filepath, diff_func_calls) in diff_graph.diff_func_calls() { + let mut source_file_name = source_filepath.to_owned(); + // get func calls + if let Some(source_file) = absolute_to_relative_path(source_filepath, review) { + source_file_name = source_file.clone(); + } + let func_calls; + if edge_color == "green" { + func_calls = diff_func_calls.added_calls(); + } else { + func_calls = diff_func_calls.deleted_calls(); + } + for dest_func_call in func_calls.function_calls() { + if let Some(import_filepath) = import_identifier.get_import_path_file( + source_filepath, lang, dest_func_call.function_name()).await { + // get file + // get diffgraph all files and see if they contain filepath + let possible_diff_file_paths: Vec<&String> = diff_graph.all_file_func_defs().all_files().into_iter() + .filter(|file_path| file_path.contains(import_filepath.get_matching_import().possible_file_path())).collect(); + if possible_diff_file_paths.is_empty() { + // get all filepaths base or head or both and see contains among them + let possible_file_pathbufs: Vec<&PathBuf> = base_filepaths.iter() + .filter(|file_path| + file_path.to_string_lossy().contains(import_filepath.get_matching_import().possible_file_path())).collect(); + if !possible_file_pathbufs.is_empty() { + for possible_file_pathbuf in possible_file_pathbufs { + if let Some(func_defs) = diff_graph.all_file_func_defs() + .functions_in_file(&possible_file_pathbuf.to_string_lossy()) + { + for dest_func_def in func_defs.functions() { + if dest_func_def.name().contains(dest_func_call.function_name()) { + // find src func def + if let Some(file_func_map) = diff_graph.all_file_func_defs().functions_in_file(source_filepath) { + if let Some(src_func_def) = file_func_map.funcs_for_func_call(dest_func_call) { + // TODO - recheck colors logic + graph_elems.add_edge( + edge_color, + dest_func_call.line_number().to_owned() as usize, + src_func_def.name(), + dest_func_call.function_name(), + &source_file_name, + &possible_file_pathbuf.to_string_lossy(), + edge_color, + "", + src_func_def.line_start(), + dest_func_def.line_start()); + } + } + } + } + } + } + } + + } else { + // get file func defs + for possible_file_path in possible_diff_file_paths { + if let Some(func_defs) = diff_graph.all_file_func_defs() + .functions_in_file(possible_file_path) { + let possible_file_name; + let possible_file_name_opt = absolute_to_relative_path(possible_file_path, review); + if possible_file_name_opt.is_none() { + possible_file_name = possible_file_path.to_string(); + } else { + possible_file_name = possible_file_name_opt.expect("Empty possible_file_name_opt"); + } + + for dest_func_def in func_defs.functions() { + if dest_func_def.name().contains(dest_func_call.function_name()) { + // TODO - add edge + if let Some(file_func_map) = diff_graph.all_file_func_defs().functions_in_file(source_filepath) { + if let Some(src_func_def) = file_func_map.funcs_for_func_call(dest_func_call) { + // TODO - recheck colors logic + graph_elems.add_edge( + edge_color, + dest_func_call.line_number().to_owned() as usize, + src_func_def.name(), + dest_func_call.function_name(), + &source_file_name, + &possible_file_name, + edge_color, + "", + src_func_def.line_start(), + dest_func_def.line_start()); + } + } + } + } + } + } + } + } + } } + // get import and path +} + +async fn process_func_defs(review: &Review, + diff_graph: &DiffGraph, func_call_identifier: &mut FunctionCallIdentifier, + lang: &str, graph_elems: &mut MermaidGraphElements, edge_color: &str) +{ + for (dest_filename, diff_func_defs) in diff_graph.diff_func_defs() { + let mut dest_file_rel = dest_filename.to_string(); + if let Some(dest_file_relative_path) = absolute_to_relative_path(&dest_filename, review) { + dest_file_rel = dest_file_relative_path; + } + let func_defs; + if edge_color == "red" { + func_defs = diff_func_defs.deleted_func_defs(); + } else { + func_defs = diff_func_defs.added_func_defs(); + } + for dest_func in func_defs { + // filter files with ripgrep + if let Some(possible_filepaths) = function_calls_search(review, dest_func.func_def().name()) { + if possible_filepaths.is_empty() { + log::debug!("[incoming_edges] No files detected having function call"); + continue; + } + for possible_filepath in possible_filepaths { + if possible_filepath == *dest_filename { + continue; + } + let possible_path = Path::new(&possible_filepath); + let possible_pathbuf = possible_path.to_path_buf(); + // get func call + if let Some(func_calls) = func_call_identifier.functions_in_file(&possible_pathbuf, lang).await { + // get func def + if let Some(func_map) = generate_function_map(&vec![possible_pathbuf]).await { + for func_call in func_calls.function_calls() { + if let Some(func_file_map) = func_map.functions_in_file(&possible_filepath) { + // find correct func def + if let Some(src_func_def) = func_file_map.funcs_for_func_call(func_call) { + if let Some(source_filename) = absolute_to_relative_path(&possible_filepath, review) { + // add edge + + graph_elems.add_edge(edge_color, + func_call.line_number().to_owned() as usize, + func_call.function_name(), + dest_func.func_def().name(), + &source_filename, + &dest_file_rel, + "", + edge_color, + src_func_def.line_start(), + dest_func.func_def().line_start()); + } + } + } + } + } + } + } + } + } + } } \ No newline at end of file diff --git a/vibi-dpu/src/graph/graph_info.rs b/vibi-dpu/src/graph/graph_info.rs index 44152a2c..fc4aa795 100644 --- a/vibi-dpu/src/graph/graph_info.rs +++ b/vibi-dpu/src/graph/graph_info.rs @@ -1,27 +1,27 @@ use std::{collections::HashMap, path::PathBuf}; -use crate::{core::diff_graph, graph::{function_line_range::generate_function_map}, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; -use super::{function_call::{function_calls_in_file, FunctionCallChunk}, function_line_range::{AllFileFunctions, FuncDefInfo}, gitops::{get_changed_hunk_lines, HunkDiffLines, HunkDiffMap}, utils::source_diff_files}; +use crate::{graph::function_line_range::generate_function_map, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; +use super::{function_call::{FunctionCallChunk, FunctionCallIdentifier, FunctionCallsOutput}, function_line_range::{AllFileFunctions, HunkFuncDef}, gitops::{get_changed_hunk_lines, HunkDiffMap}, utils::source_diff_files}; #[derive(Debug, Default, Clone)] pub struct DiffFuncDefs { - added_func_defs: Vec, - deleted_func_defs: Vec + added_func_defs: Vec, + deleted_func_defs: Vec } impl DiffFuncDefs { - pub fn extend_added_funcdefs(&mut self, add_funcdefs: Vec) { + pub fn extend_added_funcdefs(&mut self, add_funcdefs: Vec) { self.added_func_defs.extend(add_funcdefs); } - pub fn extend_deleted_funcdefs(&mut self, del_funcdefs: Vec) { + pub fn extend_deleted_funcdefs(&mut self, del_funcdefs: Vec) { self.deleted_func_defs.extend(del_funcdefs); } - pub fn added_func_defs(&self) -> &Vec { + pub fn added_func_defs(&self) -> &Vec { &self.added_func_defs } - pub fn deleted_func_defs(&self) -> &Vec { + pub fn deleted_func_defs(&self) -> &Vec { &self.deleted_func_defs } } @@ -65,24 +65,24 @@ impl FuncCall { #[derive(Debug, Default, Clone)] pub struct DiffFuncCall { - added_calls: Vec, - deleted_calls: Vec + added_calls: FunctionCallsOutput, + deleted_calls: FunctionCallsOutput } impl DiffFuncCall { - pub fn add_added_calls(&mut self, add_calls: FuncCall) { - self.added_calls.push(add_calls); - } + // pub fn add_added_calls(&mut self, add_calls: FuncCall) { + // self.added_calls.push(add_calls); + // } - pub fn add_deleted_calls(&mut self, del_calls: FuncCall) { - self.deleted_calls.push(del_calls); - } + // pub fn add_deleted_calls(&mut self, del_calls: FuncCall) { + // self.deleted_calls.push(del_calls); + // } - pub fn added_calls(&self) -> &Vec { + pub fn added_calls(&self) -> &FunctionCallsOutput { &self.added_calls } - pub fn deleted_calls(&self) -> &Vec { + pub fn deleted_calls(&self) -> &FunctionCallsOutput { &self.deleted_calls } } @@ -91,7 +91,7 @@ impl DiffFuncCall { pub struct DiffGraph { diff_files_func_defs: AllFileFunctions, // diff_files_imports: FilesImportInfo, - diff_files_func_calls: HashMap>, + diff_files_func_calls: HashMap, diff_func_defs: HashMap, diff_func_calls: HashMap, } @@ -121,14 +121,14 @@ impl DiffGraph { &self.diff_func_calls } - pub fn func_calls_for_func(&self, function_name: &str, filename: &str) -> Option<&FuncCall> { - if let Some(func_call_map) = self.diff_files_func_calls.get(filename) { - if let Some(func_call) = func_call_map.get(function_name) { - return Some(func_call) - } - } - return None; - } + // pub fn func_calls_for_func(&self, function_name: &str, filename: &str) -> Option<&FuncCall> { + // if let Some(func_call_map) = self.diff_files_func_calls.get(filename) { + // if let Some(func_call) = func_call_map.get(function_name) { + // return Some(func_call) + // } + // } + // return None; + // } } pub async fn generate_diff_graph(diff_files: &Vec, review: &Review) -> Option { @@ -155,6 +155,12 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, review: &Review) -> Opti return None; } let base_commit_func_defs = base_commit_func_defs_opt.expect("Empty let base_commit_func_defs_opt"); + let base_func_calls_opt = diff_file_func_calls(&all_diff_files, hunk_diff_map, false).await; + if base_func_calls_opt.is_none() { + log::debug!("[process_hunk_diff] Unable to calculate diff_file_func_calls"); + return None; + } + let base_func_calls = base_func_calls_opt.expect("Empty base_func_calls_opt"); git_checkout_commit(review, &review.pr_head_commit()); let diff_func_defs_opt = generate_function_map(&all_diff_files).await; // let diff_imports_opt = get_import_lines(&all_diff_files).await; @@ -163,27 +169,33 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, review: &Review) -> Opti log::debug!("[process_hunk_diff] Unable to generate func definitions diff map"); return None; } - // if diff_imports_opt.is_none() { - // log::debug!("[process_hunk_diff] Unable to generate func imports diff map"); - // return None; - // } let diff_files_func_defs = diff_func_defs_opt.expect("Empty all_file_func_defs_opt)"); - // let diff_files_imports = diff_imports_opt.expect("Empty all_file_imports_opt"); - let diff_files_func_calls = diff_file_func_calls(&all_diff_files, &diff_files_func_defs).await; + let diff_files_func_calls_opt = diff_file_func_calls(&all_diff_files, hunk_diff_map, true).await; + if diff_files_func_calls_opt.is_none() { + log::debug!("[process_hunk_diff] Unable to calculate diff_file_func_calls"); + return None; + } + let diff_files_func_calls = diff_files_func_calls_opt.expect("Empty diff_files_func_calls_opt"); let mut diff_graph = DiffGraph { - diff_files_func_calls, + diff_files_func_calls: diff_files_func_calls.clone(), diff_files_func_defs, // diff_files_imports, diff_func_defs: HashMap::new(), diff_func_calls: HashMap::new(), }; - let mut diff_func_calls_map: HashMap = HashMap::new(); for filepath in &all_diff_files { let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); let mut diff_func_defs = DiffFuncDefs { added_func_defs: Vec::new(), deleted_func_defs: Vec::new()}; - let mut diff_func_calls_add = DiffFuncCall { - added_calls: Vec::new(), deleted_calls: Vec::new()}; + // define base and diff func calls output for this filename + if let Some(base_func_call) = base_func_calls.get(filename) { + if let Some(diff_func_call) = diff_files_func_calls.get(filename) { + // initialize and add DiffFuncCall to diff_func_calls_map + let func_calls = DiffFuncCall { + added_calls: diff_func_call.to_owned(), deleted_calls: base_func_call.to_owned()}; + diff_graph.add_diff_func_calls(filename.to_string(), func_calls); + } + }; if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { for hunk_diff in file_line_map.added_hunks() { if let Some(funcs_map) = diff_graph.all_file_func_defs().functions_in_file(filename) { @@ -239,59 +251,61 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, review: &Review) -> Opti // git_checkout_commit(review, &review.base_head_commit()); // for filepath in &all_diff_files { // let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); - // let diff_func_call_del = diff_func_calls_map.entry(filename.to_string()).or_insert(DiffFuncCall { added_calls: Vec::new(), deleted_calls: Vec::new() }); - // if let Some(imports_info) = base_commit_import_info.file_import_info(filename) { - // for import_info in imports_info.all_import_paths() { - // // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls - // if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { - // // func_calls is basically all func calls of a function in the latest commit of the file - // if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { - // let func_call = FuncCall{ import_info, call_info: func_calls }; - // for hunk_diff in file_line_map.deleted_hunks() { - // if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { - // diff_func_call_del.add_deleted_calls(hunk_func_call); - // } - // } - // } - // } - // } - // } + // let diff_func_call = diff_func_calls_map.entry(filename.to_string()).or_insert(DiffFuncCall { added_calls: Vec::new(), deleted_calls: Vec::new() }); + + // // if let Some(imports_info) = base_commit_import_info.file_import_info(filename) { + // // for import_info in imports_info.all_import_paths() { + // // // todo fixme - finding all func calls in file needs a different approach to add added and deleted calls + // // if let Some(func_calls) = function_calls_in_file(&filepath, import_info.imported()).await { + // // // func_calls is basically all func calls of a function in the latest commit of the file + // // if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + // // let func_call = FuncCall{ import_info, call_info: func_calls }; + // // for hunk_diff in file_line_map.deleted_hunks() { + // // if let Some(hunk_func_call) = func_call.func_call_hunk_lines(&hunk_diff) { + // // diff_func_call_del.add_deleted_calls(hunk_func_call); + // // } + // // } + // // } + // // } + // // } + // // } // } - // for (filename, diff_func_call) in diff_func_calls_map.iter() { + // // for (filename, diff_func_call) in diff_func_calls_map.iter() { // diff_graph.add_diff_func_calls(filename.to_owned(), diff_func_call.to_owned()); // } return Some(diff_graph); } -async fn diff_file_func_calls(all_diff_files: &Vec, diff_imports: &FilesImportInfo, diff_file_funcs: &AllFileFunctions) -> HashMap>{ +async fn diff_file_func_calls(all_diff_files: &Vec, hunk_diff_map: &HunkDiffMap, added: bool) -> Option> { + // func calls made in diff hunks for all diff files let mut func_call_file_map = HashMap::new(); + let func_call_identifier_opt = FunctionCallIdentifier::new(); + if func_call_identifier_opt.is_none() { + log::error!("[diff_file_func_calls] Unable to create FunctionCallIdentifier"); + return None; + } + let mut func_call_identifier = func_call_identifier_opt.expect("Empty func_call_identifier_opt"); for filepathbuf in all_diff_files { let filepath = filepathbuf.to_str().expect("Unable to deserialize pathbuf"); - let mut func_call_map = HashMap::::new(); - // search using imports - if let Some(imports_info) = diff_imports.file_import_info(filepath) { - for import_info in imports_info.all_import_paths() { - if let Some(func_calls) = function_calls_in_file( - &filepathbuf, import_info.imported()).await { - let func_call = FuncCall{ import_info, call_info: func_calls }; - func_call_map.insert( - func_call.function_name().to_string(), func_call); - } - } + let hunk_diffs_opt = hunk_diff_map.file_hunks(filepath); + if hunk_diffs_opt.is_none() { + log::debug!("[diff_file_func_calls] No entry in hunk_diff_map for {}", filepath); + continue; } - // search in func defs - if let Some(func_def_map) = diff_file_funcs.functions_in_file(filepath) { - for func_def in func_def_map.functions() { - if let Some(func_calls) = function_calls_in_file( - &filepathbuf, func_def.name()).await { - let fake_import = ImportPath::new( 0, filepath.to_string(), func_def.name().to_string()); - let func_call = FuncCall{import_info: fake_import, call_info: func_calls}; - func_call_map.insert( - func_call.function_name().to_string(), func_call); - } - } + let hunk_diffs = hunk_diffs_opt.expect("Empty hunk_diffs_opt"); + let file_hunks; + if added { + file_hunks = hunk_diffs.added_hunks(); + } else { + file_hunks = hunk_diffs.deleted_hunks(); + } + let func_calls_opt = func_call_identifier.function_calls_in_hunks(filepathbuf, "rust", file_hunks).await; + if func_calls_opt.is_none() { + log::debug!("[diff_file_func_calls] No function calls in hunks: {}, {:?}", filepath, hunk_diffs); + continue; } - func_call_file_map.insert(filepath.to_string(), func_call_map); + let func_calls = func_calls_opt.expect("Empty func_calls_opt"); + func_call_file_map.insert(filepath.to_string(), func_calls); } - return func_call_file_map; + return Some(func_call_file_map); } \ No newline at end of file diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs index f6e8938e..4d9c823a 100644 --- a/vibi-dpu/src/graph/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -50,16 +50,17 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review let diff_graph = diff_graph_opt.expect("Empty diff_graph_opt"); // let diff_graph = get_test_diff_graph(); // let diff_info = generate_diff_info(&full_graph, &diff_graph); + // git_checkout_commit(review, review.pr_head_commit()); + // let head_filepaths_opt = all_code_files(review.clone_dir()); + // if head_filepaths_opt.is_none() { + // log::error!( + // "[generate_flowchart_elements] Unable to get file paths: {}", review.clone_dir()); + // return None; + // } + // let head_filepaths = head_filepaths_opt.expect("Empty head_filepaths_opt"); let mut graph_elems = MermaidGraphElements::new(); - git_checkout_commit(review, review.pr_head_commit()); - let head_filepaths_opt = all_code_files(review.clone_dir()); - if head_filepaths_opt.is_none() { - log::error!( - "[generate_flowchart_elements] Unable to get file paths: {}", review.clone_dir()); - return None; - } - let head_filepaths = head_filepaths_opt.expect("Empty head_filepaths_opt"); - graph_edges(&base_filepaths, &head_filepaths, review, &base_commit_import_info, &diff_graph, &mut graph_elems).await; + let lang = "rust"; + graph_edges(&base_filepaths, review, &diff_graph, &mut graph_elems, lang).await; let elems_str = graph_elems.render_elements(review); return Some(elems_str); } \ No newline at end of file From 509486673bcedac67ab099e4bbcc6fbf7055e99a Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Thu, 10 Oct 2024 19:29:50 +0530 Subject: [PATCH 35/43] Add prompts and ripgrep to dockerfile --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index bea22e25..c4e3ff04 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ FROM ubuntu:latest # # Install dependencies required by the application RUN \ apt-get update && \ - apt-get install ca-certificates git -y && \ + apt-get install ca-certificates git ripgrep -y && \ apt-get clean ADD http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb /tmp RUN chmod a+x /tmp/libssl1.1_1.1.1f-1ubuntu2_amd64.deb && \ @@ -45,7 +45,7 @@ ENV PROVIDER=$PROVIDER COPY ./vibi-dpu/target/debug/vibi-dpu /app/vibi-dpu COPY ./pubsub-sa.json /app/pubsub-sa.json COPY ./repo-profiler.pem /app/repo-profiler.pem - +COPY ./prompts /app/prompts # Create directory for configuration RUN mkdir /app/config From 2d410690d565e0616919c51389058ae3a8568a0d Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Fri, 11 Oct 2024 20:56:16 +0530 Subject: [PATCH 36/43] Implement looking at one func def at one time --- vibi-dpu/src/graph/function_call.rs | 50 ++- vibi-dpu/src/graph/function_line_range.rs | 188 +++++++++- vibi-dpu/src/graph/gitops.rs | 93 ++++- vibi-dpu/src/graph/graph_edges.rs | 404 ++++++++-------------- vibi-dpu/src/graph/graph_info.rs | 280 ++++++++------- vibi-dpu/src/utils/gitops.rs | 1 + 6 files changed, 609 insertions(+), 407 deletions(-) diff --git a/vibi-dpu/src/graph/function_call.rs b/vibi-dpu/src/graph/function_call.rs index 3cd6a608..083dbddc 100644 --- a/vibi-dpu/src/graph/function_call.rs +++ b/vibi-dpu/src/graph/function_call.rs @@ -231,16 +231,16 @@ pub struct FunctionCallIdentifier { impl FunctionCallIdentifier { pub fn new() -> Option { - let system_prompt_opt = read_file("/app/prompts/prompt_function_call"); + let system_prompt_opt = read_file("/app/prompts/prompt_function_calls"); if system_prompt_opt.is_none() { - log::error!("[function_calls_in_chunk] Unable to read prompt_function_call"); + log::error!("[function_calls_in_chunk] Unable to read prompt_function_calls"); return None; } let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); let prompt_json_res = serde_json::from_str(&system_prompt_lines); if prompt_json_res.is_err() { log::error!("[FunctionCallIdentifier/new] Unable to deserialize prompt_json: {:?}", - prompt_json_res.expect("Empty bprompt_json_res")); + prompt_json_res.expect("Empty prompt_json_res")); return None; } let prompt_json: JsonStructure = prompt_json_res.expect("Empty error in prompt_json_res"); @@ -302,21 +302,47 @@ impl FunctionCallIdentifier { return Some(func_calls); } - pub async fn function_calls_in_hunks(&mut self, filepath: &PathBuf, lang: &str, diff_hunks: &Vec) -> Option { + pub async fn function_calls_in_hunks(&mut self, filepath: &PathBuf, lang: &str, diff_hunks: &Vec) -> Option> { let func_calls_opt = self.functions_in_file(filepath, lang).await; + if func_calls_opt.is_none() { log::debug!("[FunctionCallIdentifier/function_calls_in_hunks] No func calls in {:?}", filepath); return None; } - let mut func_calls = func_calls_opt.expect("Empty func_calls_opt"); - func_calls.function_calls.retain(|function_call| { - // Check if the function call's line number is outside of any hunk diff ranges - !diff_hunks.iter().any(|hunk| { - function_call.line_number >= *hunk.start_line() as u32 && function_call.line_number <= *hunk.end_line() as u32 - }) - }); - return Some(func_calls); + + let func_calls = func_calls_opt.expect("Empty func_calls_opt"); + + // Create a vector to store the result (HunkDiffLines, FunctionCallsOutput) tuples + let mut hunk_func_pairs: Vec<(HunkDiffLines, FunctionCallsOutput)> = Vec::new(); + + // For each hunk, find matching function calls + for hunk in diff_hunks { + // Collect function calls within this hunk's line range + let matching_func_calls: Vec = func_calls + .function_calls + .iter() + .filter(|function_call| { + function_call.line_number >= *hunk.start_line() as u32 && function_call.line_number <= *hunk.end_line() as u32 + }) + .cloned() // Clone the function calls so we can move them into the new FunctionCallsOutput + .collect(); + + // If there are any matching function calls, create a FunctionCallsOutput and pair it with the hunk + if !matching_func_calls.is_empty() { + let mut matching_func_calls_output = func_calls.clone(); + matching_func_calls_output.function_calls = matching_func_calls; + + hunk_func_pairs.push((hunk.clone(), matching_func_calls_output)); + } + } + + if hunk_func_pairs.is_empty() { + None + } else { + Some(hunk_func_pairs) + } } + } pub fn function_calls_search(review: &Review, function_name: &str) -> Option>{ diff --git a/vibi-dpu/src/graph/function_line_range.rs b/vibi-dpu/src/graph/function_line_range.rs index e8067905..e2aefc45 100644 --- a/vibi-dpu/src/graph/function_line_range.rs +++ b/vibi-dpu/src/graph/function_line_range.rs @@ -50,6 +50,132 @@ impl HunkFuncDef { } } +// Struct to represent function definition +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +struct FunctionDefinition { + line_number: i32, +} + +// Struct to represent the output schema +#[derive(Serialize, Deserialize, Debug)] +pub struct FunctionDefOutput { + function_definition: Option, + notes: Option, +} + +impl FunctionDefOutput { + pub fn get_function_line_number(&self) -> Option { + if let Some(func_def) = &self.function_definition { + return Some(func_def.line_number as usize) + } + return None; + } +} + +// Struct to represent the input schema +#[derive(Serialize, Deserialize, Debug)] +struct InputSchema { + code_chunk: String, + language: String, + function_name: String, +} + +// Struct for instructions that hold input/output schemas +#[derive(Serialize, Deserialize, Debug)] +struct Instructions { + input_schema: InputSchema, + output_schema: FunctionDefOutput, + task_description: String, +} +// Struct for the entire JSON prompt +#[derive(Serialize, Deserialize, Debug)] +struct FunctionDefPrompt { + instructions: Instructions, + sample_input: InputSchema, + expected_output: FunctionDefOutput, + input: Option, +} + +impl FunctionDefPrompt { + fn set_input(&mut self, input: InputSchema) { + self.input = Some(input); + } +} + +pub struct FunctionDefIdentifier { + prompt: FunctionDefPrompt +} + +impl FunctionDefIdentifier { + pub fn new() -> Option { + let system_prompt_opt = read_file("/app/prompts/prompt_function_def"); + if system_prompt_opt.is_none() { + log::error!("[function_calls_in_chunk] Unable to read prompt_function_def"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let prompt_json_res = serde_json::from_str(&system_prompt_lines); + if prompt_json_res.is_err() { + log::error!("[FunctionCallIdentifier/new] Unable to deserialize prompt_json: {:?}", + prompt_json_res.expect("Empty prompt_json_res")); + return None; + } + let prompt_json: FunctionDefPrompt = prompt_json_res.expect("Empty error in prompt_json_res"); + return Some(Self { prompt: prompt_json}); + } + + pub async fn function_defs_in_file(&mut self, filepath: &PathBuf, lang: &str, function_name: &str) -> Option { + // concatenate functioncallsoutput for all chunks + let file_contents_res = std::fs::read_to_string(filepath.clone()); + if file_contents_res.is_err() { + log::error!( + "[FunctionCallIdentifier/functions_in_file] Unable to read file: {:?}, error: {:?}", + &filepath, file_contents_res.expect_err("Empty error in file_contents_res") + ); + return None; + } + let file_contents = file_contents_res.expect("Uncaught error in file_contents_res"); + let numbered_content = numbered_content(file_contents); + let chunks = numbered_content.chunks(50); + for chunk in chunks { + let chunk_str = chunk.join("\n"); + if let Some(func_defs) = self.function_defs_in_chunk(&chunk_str, filepath, lang, function_name).await { + return Some(func_defs); + } + } + return None; + } + + async fn function_defs_in_chunk(&mut self, chunk: &str, filepath: &PathBuf, lang: &str, function_name: &str) -> Option { + let input = InputSchema{ code_chunk: chunk.to_string(), language: lang.to_string(), + function_name: function_name.to_string() }; + self.prompt.input = Some(input); + let prompt_str_res = serde_json::to_string(&self.prompt); + if prompt_str_res.is_err() { + log::error!( + "[FunctionCallIdentifier/functions_in_chunk] Unable to serialize prompt: {:?}", + prompt_str_res.expect_err("Empty error in prompt_str_res")); + return None; + } + let prompt_str = prompt_str_res.expect("Uncaught error in prompt_str_res"); + let final_prompt = format!("{}\nOutput - ", &prompt_str); + let prompt_response_opt = call_llm_api(final_prompt).await; + if prompt_response_opt.is_none() { + log::error!("[FunctionCallIdentifier/functions_in_chunk] Unable to call llm for chunk: {:?}", chunk); + return None; + } + let prompt_response = prompt_response_opt.expect("Empty prompt_response_opt"); + let deserialized_response = serde_json::from_str(&prompt_response); + if deserialized_response.is_err() { + let e = deserialized_response.expect_err("Empty error in deserialized_response"); + log::error!("[FunctionCallIdentifier/functions_in_chunk] Error in deserializing response: {:?}", e); + return None; + } + let func_calls: FunctionDefOutput = deserialized_response.expect("Empty error in deserialized_response"); + return Some(func_calls); + } +} + #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FunctionFileMap { pub(crate) file_name: String, @@ -144,7 +270,7 @@ struct LlmFuncDefRequest { } #[derive(Debug, Serialize, Default, Deserialize, Clone)] -struct LlmFuncDef { +pub struct LlmFuncDef { #[serde(default)] name: String, #[serde(default)] @@ -152,12 +278,28 @@ struct LlmFuncDef { #[serde(default)] parent: String } + +impl LlmFuncDef { + pub fn name(&self) -> &String { + &self.name + } + + pub fn line_start(&self) -> &usize { + &self.line_start + } +} #[derive(Debug, Serialize, Default, Deserialize, Clone)] struct LlmFuncDefResponse { #[serde(default)] functions: Vec } +impl LlmFuncDefResponse { + pub fn functions(&self) -> &Vec { + &self.functions + } +} + #[derive(Debug, Serialize, Default, Deserialize, Clone)] struct LlmFuncBoundaryInput { language: String, @@ -238,6 +380,50 @@ pub async fn generate_function_map(file_paths: &Vec) -> Option Option { + let system_prompt_opt = read_file("/app/prompts/prompt_function_lines"); + if system_prompt_opt.is_none() { + log::error!("[get_function_def_for_func_call] Unable to read prompt_function_lines"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let file_contents_res = std::fs::read_to_string(filepath.clone()); + if file_contents_res.is_err() { + log::error!("[get_function_def_for_func_call] Error in reading file contents: {:?}", + file_contents_res.expect_err("Empty error")); + return None; + } + let file_contents = file_contents_res.expect("Uncaught error in file_content_res"); + let numbered_content = numbered_content(file_contents); + let mut current_line = func_call_line_num; + let chunk_size = 30; + // Loop until we reach the beginning of the file + while current_line > 0 { + // Determine the start and end for the current chunk + let start = if current_line >= chunk_size { + current_line - chunk_size + } else { + 0 + }; + + // Extract the chunk + let chunk_str: String = numbered_content[start..=current_line].join("\n"); + // Process the chunk + let function_defs_opt = get_function_defs_in_chunk(&chunk_str, &system_prompt_lines).await; + if function_defs_opt.is_none() { + log::error!("[generate_function_map] Unable to get functions from llm"); + continue; + } + let function_defs = function_defs_opt.expect("Empty function_defs"); + if let Some(func_def) = function_defs.functions().first() { + return Some(func_def.to_owned()); + } + // Move the current line up by the chunk size + current_line = start; + } + return None; +} + async fn get_function_defs_in_chunk(chunk: &str, system_prompt: &str) -> Option { let llm_req = LlmFuncDefRequest { input: LlmFuncDefInput { diff --git a/vibi-dpu/src/graph/gitops.rs b/vibi-dpu/src/graph/gitops.rs index 1be89b7d..920673b8 100644 --- a/vibi-dpu/src/graph/gitops.rs +++ b/vibi-dpu/src/graph/gitops.rs @@ -6,6 +6,8 @@ use crate::utils::{gitops::StatItem, review::Review}; pub struct HunkDiffLines { start_line: usize, end_line: usize, + function_line: Option, + line_number: Option } impl HunkDiffLines { @@ -16,6 +18,18 @@ impl HunkDiffLines { pub fn end_line(&self) -> &usize { &self.end_line } + + pub fn function_line(&self) -> &Option { + &self.function_line + } + + pub fn line_number(&self) -> &Option { + &self.line_number + } + + pub fn set_line_number(&mut self, line_number: Option) { + self.line_number = line_number; + } } #[derive(Debug, Default, Clone)] @@ -32,6 +46,33 @@ impl FileHunks { pub fn added_hunks(&self) -> &Vec { &self.added_hunks } + + // Mutable references to allow modification of the hunks + pub fn deleted_hunks_mut(&mut self) -> &mut Vec { + &mut self.deleted_hunks + } + + pub fn added_hunks_mut(&mut self) -> &mut Vec { + &mut self.added_hunks + } + + pub fn is_func_in_hunks(&self, function_name: &str) -> &Option { + for hunk_lines in self.added_hunks() { + if let Some(func_raw) = hunk_lines.function_line() { + if func_raw.contains(function_name) { + return hunk_lines.line_number(); + } + } + } + for hunk_lines in self.deleted_hunks() { + if let Some(func_raw) = hunk_lines.function_line() { + if func_raw.contains(function_name) { + return hunk_lines.line_number(); + } + } + } + return &None; + } } #[derive(Debug, Default, Clone)] @@ -44,6 +85,10 @@ impl HunkDiffMap { &self.file_line_map } + pub fn file_line_map_mut(&mut self) -> &mut HashMap { + &mut self.file_line_map + } + pub fn all_files(&self) -> Vec<&String> { self.file_line_map.keys().collect::>() } @@ -65,7 +110,7 @@ impl HunkDiffMap { } pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> HunkDiffMap { - let mut file_hunk_map = HunkDiffMap{file_line_map: HashMap::new()}; + let mut file_hunk_map = HunkDiffMap { file_line_map: HashMap::new() }; let prev_commit = review.base_head_commit(); let curr_commit = review.pr_head_commit(); let clone_dir = review.clone_dir(); @@ -74,26 +119,32 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu let filepath = item.filepath.as_str(); let commit_range = format!("{}...{}", prev_commit, curr_commit); log::debug!("[extract_hunks] | clone_dir = {:?}, filepath = {:?}", clone_dir, filepath); + let output_res = Command::new("git") .arg("diff") .arg("--unified=0") + .arg("--ignore-space-change") .arg(&commit_range) .arg(&filepath) .current_dir(clone_dir) .output(); + if output_res.is_err() { let commanderr = output_res.expect_err("No error in output_res"); log::error!("[extract_hunks] git diff command failed to start : {:?}", commanderr); continue; } + let result = output_res.expect("Uncaught error in output_res"); let diff = result.stdout; - let diffstr_res = str::from_utf8(&diff); + let diffstr_res = std::str::from_utf8(&diff); + if diffstr_res.is_err() { let e = diffstr_res.expect_err("No error in diffstr_res"); log::error!("[extract_hunks] Unable to deserialize diff: {:?}", e); continue; } + let diffstr = diffstr_res.expect("Uncaught error in diffstr_res"); log::debug!("[extract_hunks] diffstr = {}", &diffstr); @@ -103,7 +154,13 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu let mut current_del_end = 0; let mut in_add_hunk = false; let mut in_del_hunk = false; - let mut file_hunks = FileHunks {deleted_hunks: Vec::new(), added_hunks: Vec::new()}; + let mut file_hunks = FileHunks { + deleted_hunks: Vec::new(), + added_hunks: Vec::new(), + }; + + // Variable to store the function line + let mut function_line: Option = None; for line in diffstr.lines() { if line.starts_with("@@") { @@ -112,23 +169,36 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu file_hunks.added_hunks.push(HunkDiffLines { start_line: current_add_start, end_line: current_add_end, + function_line: function_line.clone(), // Use the function line stored + line_number: None }); } if in_del_hunk { file_hunks.deleted_hunks.push(HunkDiffLines { start_line: current_del_start, end_line: current_del_end, + function_line: function_line.clone(), // Use the function line stored + line_number: None }); } + // Reset states for next hunk in_add_hunk = false; in_del_hunk = false; + // Extract the function name or any string after the last @@ let parts: Vec<&str> = line.split_whitespace().collect(); if parts.len() > 2 { - let del_hunk = parts[1]; - let add_hunk = parts[2]; + function_line = Some(parts[2].to_string()); // Store the function line here + } else { + function_line = None; // Reset if no valid function line found + } + // Determine the start and end lines for the hunks + let del_hunk = parts.get(1); + let add_hunk = parts.get(2); + + if let Some(del_hunk) = del_hunk { if del_hunk.starts_with('-') { if let Some((start, len)) = parse_hunk_range(del_hunk) { current_del_start = start; @@ -136,7 +206,9 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu in_del_hunk = true; } } + } + if let Some(add_hunk) = add_hunk { if add_hunk.starts_with('+') { if let Some((start, len)) = parse_hunk_range(add_hunk) { current_add_start = start; @@ -148,29 +220,36 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu } } - // Push the last hunks + // Push the last hunks if still in any hunk if in_add_hunk { file_hunks.added_hunks.push(HunkDiffLines { start_line: current_add_start, end_line: current_add_end, + function_line: function_line.clone(), // Use the function line stored + line_number: None }); } if in_del_hunk { file_hunks.deleted_hunks.push(HunkDiffLines { start_line: current_del_start, end_line: current_del_end, + function_line: function_line.clone(), // Use the function line stored + line_number: None }); } + let abs_filepath = Path::new(review.clone_dir()); let abs_file_pathbuf = abs_filepath.join(Path::new(filepath)); file_hunk_map.file_line_map.insert( abs_file_pathbuf.to_str().expect("Unable to deserialize pathbuf").to_string(), - file_hunks); + file_hunks, + ); } return file_hunk_map; } + fn parse_hunk_range(hunk: &str) -> Option<(usize, usize)> { let hunk = hunk.trim_start_matches(&['-', '+'][..]); let parts: Vec<&str> = hunk.split(',').collect(); diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs index 2f3414c5..99cfddd2 100644 --- a/vibi-dpu/src/graph/graph_edges.rs +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -1,7 +1,7 @@ use std::path::{Path, PathBuf}; use crate::utils::{gitops::git_checkout_commit, review::Review}; -use super::{elements::MermaidGraphElements, file_imports::ImportIdentifier, function_call::{function_calls_search, FunctionCallIdentifier}, function_line_range::generate_function_map, graph_info::DiffGraph, utils::absolute_to_relative_path}; +use super::{elements::MermaidGraphElements, file_imports::ImportIdentifier, function_call::{function_calls_search, FunctionCallIdentifier}, function_line_range::{generate_function_map, get_function_def_for_func_call, FunctionDefIdentifier}, graph_info::DiffGraph, utils::absolute_to_relative_path}; pub async fn graph_edges(base_filepaths: &Vec, review: &Review, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, lang: &str) { outgoing_edges(base_filepaths, diff_graph, graph_elems, review, lang).await; @@ -188,7 +188,13 @@ async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &m // } // } // } - let func_call_identifier_opt = FunctionCallIdentifier::new(); + let func_def_identifier_opt = FunctionDefIdentifier::new(); + if func_def_identifier_opt.is_none() { + log::debug!("[outgoing_edges] Unable to create func def identifier"); + return; + } + let mut funcdef_identifier = func_def_identifier_opt.expect("Empty func_def_identifier_opt"); + let mut func_call_identifier_opt = FunctionCallIdentifier::new(); if func_call_identifier_opt.is_none() { log::error!("[incoming_edges] Unable to create new FunctionCallIdentifier"); return; @@ -197,6 +203,7 @@ async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &m git_checkout_commit(review, review.pr_head_commit()); process_func_defs( review, + &mut funcdef_identifier, diff_graph, &mut func_call_identifier, lang, @@ -206,6 +213,7 @@ async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &m git_checkout_commit(review, review.base_head_commit()); process_func_defs( review, + &mut funcdef_identifier, diff_graph, &mut func_call_identifier, lang, @@ -228,15 +236,29 @@ async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &m async fn outgoing_edges(base_filepaths: &Vec, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, review: &Review, lang: &str) { + let func_call_identifier_opt = FunctionCallIdentifier::new(); + if func_call_identifier_opt.is_none() { + log::error!("[incoming_edges] Unable to create new FunctionCallIdentifier"); + return; + } + let mut func_call_identifier = func_call_identifier_opt.expect("Empty func_call_identifier_opt"); let import_identifier_opt = ImportIdentifier::new(); if import_identifier_opt.is_none() { log::debug!("[outgoing_edges] Unable to create import identifier"); return; } - let mut import_identifier = import_identifier_opt.expect("EMpty import_identifier_opt"); + let mut import_identifier = import_identifier_opt.expect("Empty import_identifier_opt"); + let func_def_identifier_opt = FunctionDefIdentifier::new(); + if func_def_identifier_opt.is_none() { + log::debug!("[outgoing_edges] Unable to create func def identifier"); + return; + } + let mut funcdef_identifier = func_def_identifier_opt.expect("Empty func_def_identifier_opt"); git_checkout_commit(review, review.pr_head_commit()); process_func_calls( &mut import_identifier, + &mut func_call_identifier, + &mut funcdef_identifier, lang, review, diff_graph, @@ -245,308 +267,167 @@ async fn outgoing_edges(base_filepaths: &Vec, diff_graph: &DiffGraph, "green").await; git_checkout_commit(review, review.base_head_commit()); process_func_calls(&mut import_identifier, + &mut func_call_identifier, + &mut funcdef_identifier, lang, review, diff_graph, base_filepaths, graph_elems, "red").await; - - // get func def - - // for source_func_call in func_calls.added_calls() { - // log::debug!("[outgoing_edges] source func call import info = {:#?}", source_func_call.import_info()); - // // todo fixme - normalize dest_filename - // let dest_filename = source_func_call.import_info().import_path(); - // let lines = source_func_call.call_info().iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - // // send this file for getting func defs - // // search in diff graph - // let diff_file_funcdefs = diff_graph.all_file_func_defs(); - // // identify this particular func - // if let Some(func_defs) = diff_file_funcdefs.functions_in_file(dest_filename) { - // let source_func_defs = func_defs.funcs_for_lines(&lines); - // for dest_func_def in func_defs.functions() { - // if match_import_func( source_func_call.import_info(), dest_func_def) { - // // add edge - // log::debug!("[outgoing_edges] Adding edge"); - // for (line_num, source_func_def) in &source_func_defs { - // graph_elems.add_edge("green", - // line_num.to_owned(), - // source_func_def.name(), - // dest_func_def.name(), - // &source_file_name, - // dest_filename, - // "green", - // "", - // source_func_def.line_start(), - // dest_func_def.line_start() - // ); - // } - // } - // } - // } - // // search in full graph - // let dest_filepath_opt = match_imported_filename_to_path(base_filepaths, dest_filename); - // if dest_filepath_opt.is_none() { - // log::error!("[outgoing_edges] Unable to find filename in all paths: {}", dest_filename); - // continue; - // } - // let dest_filepath = dest_filepath_opt.expect("EMpty dest_filepath_opt"); - // if let Some(all_file_funcdefs) = generate_function_map(&vec![dest_filepath.clone()]).await { - // // identify this particular func - // let dest_filepath_key = dest_filepath.as_os_str().to_str().expect("Unable to deserialize dest_filepath"); - // let mut dest_file_rel = dest_filepath_key.to_string(); - // if let Some(dest_relative_filepath) = absolute_to_relative_path(&dest_file_rel, review) { - // dest_file_rel = dest_relative_filepath; - // } - // if let Some(func_defs) = all_file_funcdefs.functions_in_file(dest_filepath_key) { - // let source_func_defs = func_defs.funcs_for_lines(&lines); - // for dest_func_def in func_defs.functions() { - // if match_import_func(source_func_call.import_info(), dest_func_def) { - // // add edge - // for (line_num, source_func_def) in &source_func_defs { - // graph_elems.add_edge("green", - // line_num.to_owned(), - // source_func_def.name(), - // dest_func_def.name(), - // &source_file_name, - // &dest_file_rel, - // "green", - // "", - // source_func_def.line_start(), - // dest_func_def.line_start() - // ); - // } - // } - // } - // } - // } - // } - // // do same for deleted_calls - // for source_func_call in func_calls.deleted_calls() { - // log::debug!("[outgoing_edges] source func call import info = {:#?}", source_func_call.import_info()); - // // todo fixme - normalize dest_filename - // let dest_filename = source_func_call.import_info().import_path(); - // let diff_file_funcdefs = diff_graph.all_file_func_defs(); - // let lines = source_func_call.call_info().iter().flat_map(|chunk| chunk.function_calls()).cloned().collect(); - // // identify this particular func - // if let Some(func_defs) = diff_file_funcdefs.functions_in_file(dest_filename) { - // let source_func_defs = func_defs.funcs_for_lines(&lines); - // for dest_func_def in func_defs.functions() { - // if match_import_func(source_func_call.import_info(), dest_func_def) { - // // add edge - // for (line_num, source_func_def) in &source_func_defs { - // graph_elems.add_edge("red", - // line_num.to_owned(), - // source_func_def.name(), - // dest_func_def.name(), - // &source_file_name, - // dest_filename, - // "red", - // "", - // source_func_def.line_start(), - // dest_func_def.line_start() - // ); - // } - // } - // } - // } - // // send this file for getting func defs - // let dest_filepath_opt = match_imported_filename_to_path(base_filepaths, dest_filename); - // if dest_filepath_opt.is_none() { - // log::error!("[outgoing_edges] Unable to find filename in all paths: {}", dest_filename); - // continue; - // } - // let dest_filepath = dest_filepath_opt.expect("EMpty dest_filepath_opt"); - // if let Some(all_file_funcdefs) = generate_function_map(&vec![dest_filepath.clone()]).await { - // // identify this particular func - // if let Some(src_file_funcs) = diff_graph.all_file_func_defs().functions_in_file(source_filepath) { - // let dest_filepath_key = dest_filepath.as_os_str().to_str().expect("Unable to deserialize dest_filepath"); - // if let Some(dest_func_defs) = all_file_funcdefs.functions_in_file(dest_filepath_key) { - // let mut rel_dest_filepath = dest_filepath_key.to_string(); - // if let Some(dest_file) = absolute_to_relative_path(dest_filepath_key, review){ - // rel_dest_filepath = dest_file.clone(); - // } - // // TODO FIXME - func_defs is for dest, we need it for src file, check other places as well to fix this - // let source_func_defs = src_file_funcs.funcs_for_lines(&lines); - // log::debug!("[outgoing_edges] lines = {:?}, source_func_defs = {:#?} dest_func_defs = {:#?}", &lines, &source_func_defs, &dest_func_defs); - // for dest_func_def in dest_func_defs.functions() { - // if match_import_func(source_func_call.import_info(), dest_func_def) { - // // add edge - // for (line_num, source_func_def) in &source_func_defs { - // log::debug!("[outgoing_edges] Adding edge for deleted func in full_graph"); - // graph_elems.add_edge("red", - // line_num.to_owned(), - // source_func_def.name(), - // dest_func_def.name(), - // &source_file_name, - // &rel_dest_filepath, - // "red", - // "", - // source_func_def.line_start(), - // dest_func_def.line_start() - // ); - // } - // } - // } - // } - // } - // } - // } - // } } -async fn process_func_calls(import_identifier: &mut ImportIdentifier, lang: &str, - review: &Review, diff_graph: &DiffGraph, base_filepaths: &Vec, +async fn process_func_calls(import_identifier: &mut ImportIdentifier, func_call_identifier: &mut FunctionCallIdentifier, + funcdef_identifier: &mut FunctionDefIdentifier, + lang: &str, review: &Review, diff_graph: &DiffGraph, base_filepaths: &Vec, graph_elems: &mut MermaidGraphElements, edge_color: &str) { - for (source_filepath, diff_func_calls) in diff_graph.diff_func_calls() { + for (source_filepath, src_file_hunks) in diff_graph.hunk_diff_map().file_line_map() { let mut source_file_name = source_filepath.to_owned(); // get func calls if let Some(source_file) = absolute_to_relative_path(source_filepath, review) { source_file_name = source_file.clone(); } - let func_calls; + let diff_hunks; if edge_color == "green" { - func_calls = diff_func_calls.added_calls(); + diff_hunks = src_file_hunks.added_hunks(); } else { - func_calls = diff_func_calls.deleted_calls(); + diff_hunks = src_file_hunks.deleted_hunks(); } - for dest_func_call in func_calls.function_calls() { - if let Some(import_filepath) = import_identifier.get_import_path_file( - source_filepath, lang, dest_func_call.function_name()).await { - // get file - // get diffgraph all files and see if they contain filepath - let possible_diff_file_paths: Vec<&String> = diff_graph.all_file_func_defs().all_files().into_iter() - .filter(|file_path| file_path.contains(import_filepath.get_matching_import().possible_file_path())).collect(); - if possible_diff_file_paths.is_empty() { - // get all filepaths base or head or both and see contains among them - let possible_file_pathbufs: Vec<&PathBuf> = base_filepaths.iter() - .filter(|file_path| - file_path.to_string_lossy().contains(import_filepath.get_matching_import().possible_file_path())).collect(); - if !possible_file_pathbufs.is_empty() { - for possible_file_pathbuf in possible_file_pathbufs { - if let Some(func_defs) = diff_graph.all_file_func_defs() - .functions_in_file(&possible_file_pathbuf.to_string_lossy()) - { - for dest_func_def in func_defs.functions() { - if dest_func_def.name().contains(dest_func_call.function_name()) { - // find src func def - if let Some(file_func_map) = diff_graph.all_file_func_defs().functions_in_file(source_filepath) { - if let Some(src_func_def) = file_func_map.funcs_for_func_call(dest_func_call) { - // TODO - recheck colors logic - graph_elems.add_edge( - edge_color, - dest_func_call.line_number().to_owned() as usize, - src_func_def.name(), - dest_func_call.function_name(), - &source_file_name, - &possible_file_pathbuf.to_string_lossy(), - edge_color, - "", - src_func_def.line_start(), - dest_func_def.line_start()); + let source_file_path = Path::new(source_filepath); + let source_file_pathbuf = source_file_path.to_path_buf(); + if let Some(hunk_func_calls) = func_call_identifier. + function_calls_in_hunks(&source_file_pathbuf, lang, diff_hunks).await { + for (hunk_lines, func_call_output) in hunk_func_calls { + for dest_func_call in func_call_output.function_calls() { + if let Some(import_filepath) = import_identifier.get_import_path_file( + source_filepath, lang, dest_func_call.function_name()).await { + // get file + // get diffgraph all files and see if they contain filepath + let possible_diff_file_paths: Vec<&String> = diff_graph.hunk_diff_map().all_files().into_iter() + .filter(|file_path| file_path.contains(import_filepath.get_matching_import().possible_file_path())).collect(); + if !possible_diff_file_paths.is_empty() { + for possible_diff_file_path in possible_diff_file_paths { + if diff_graph.hunk_diff_map().all_files().contains(&possible_diff_file_path) + { + let hunks_for_func = diff_graph.hunk_diff_map().file_line_map() + .get(possible_diff_file_path).expect("Empty entry in file_line_map"); + if let Some(possible_file_rel) = absolute_to_relative_path(possible_diff_file_path, review) { + if let Some(dest_func_def_line) = hunks_for_func.is_func_in_hunks(dest_func_call.function_name()) { + if let Some(src_func_name) = hunk_lines.function_line() { + if let Some(src_func_line_number) = hunk_lines.line_number() { + graph_elems.add_edge( + edge_color, + dest_func_call.line_number().to_owned() as usize, + src_func_name, + dest_func_call.function_name(), + &source_file_name, + &possible_file_rel, + edge_color, + "", + src_func_line_number, + dest_func_def_line); + } } } } } - } - } - } - - } else { - // get file func defs - for possible_file_path in possible_diff_file_paths { - if let Some(func_defs) = diff_graph.all_file_func_defs() - .functions_in_file(possible_file_path) { - let possible_file_name; - let possible_file_name_opt = absolute_to_relative_path(possible_file_path, review); - if possible_file_name_opt.is_none() { - possible_file_name = possible_file_path.to_string(); - } else { - possible_file_name = possible_file_name_opt.expect("Empty possible_file_name_opt"); - } - - for dest_func_def in func_defs.functions() { - if dest_func_def.name().contains(dest_func_call.function_name()) { - // TODO - add edge - if let Some(file_func_map) = diff_graph.all_file_func_defs().functions_in_file(source_filepath) { - if let Some(src_func_def) = file_func_map.funcs_for_func_call(dest_func_call) { - // TODO - recheck colors logic - graph_elems.add_edge( - edge_color, - dest_func_call.line_number().to_owned() as usize, - src_func_def.name(), - dest_func_call.function_name(), - &source_file_name, - &possible_file_name, - edge_color, - "", - src_func_def.line_start(), - dest_func_def.line_start()); + } + } else { + // search all files + // TODO - see if git checkout is needed + let possible_file_pathbufs: Vec<&PathBuf> = base_filepaths.iter() + .filter(|file_path| + file_path.to_string_lossy().contains(import_filepath.get_matching_import().possible_file_path())).collect(); + if !possible_file_pathbufs.is_empty() { + for possible_file_pathbuf in possible_file_pathbufs { + let possible_file_path: String = possible_file_pathbuf.to_string_lossy().to_string(); + // search only for func def with specific name + // if something comes up, add edge! + if let Some(func_defs) = funcdef_identifier.function_defs_in_file( + possible_file_pathbuf, lang, dest_func_call.function_name()).await { + if let Some(dest_func_def_line) = func_defs.get_function_line_number() { + if let Some(src_func_name) = hunk_lines.function_line() { + if let Some(src_func_line_number) = hunk_lines.line_number() { + if let Some(possible_file_rel) = + absolute_to_relative_path(&possible_file_path, review) { + graph_elems.add_edge( + edge_color, + dest_func_call.line_number().to_owned() as usize, + src_func_name, + dest_func_call.function_name(), + &source_file_name, + &possible_file_rel, + edge_color, + "", + src_func_line_number, + &dest_func_def_line); + } + } + } } } } } } - } - } + } + } } } } // get import and path } -async fn process_func_defs(review: &Review, +async fn process_func_defs(review: &Review, funcdef_identifier: &mut FunctionDefIdentifier, diff_graph: &DiffGraph, func_call_identifier: &mut FunctionCallIdentifier, lang: &str, graph_elems: &mut MermaidGraphElements, edge_color: &str) { - for (dest_filename, diff_func_defs) in diff_graph.diff_func_defs() { - let mut dest_file_rel = dest_filename.to_string(); - if let Some(dest_file_relative_path) = absolute_to_relative_path(&dest_filename, review) { - dest_file_rel = dest_file_relative_path; - } + for (dest_filename, dest_file_hunks) in diff_graph.hunk_diff_map().file_line_map() { let func_defs; if edge_color == "red" { - func_defs = diff_func_defs.deleted_func_defs(); + func_defs = dest_file_hunks.deleted_hunks(); } else { - func_defs = diff_func_defs.added_func_defs(); + func_defs = dest_file_hunks.added_hunks(); } for dest_func in func_defs { - // filter files with ripgrep - if let Some(possible_filepaths) = function_calls_search(review, dest_func.func_def().name()) { - if possible_filepaths.is_empty() { - log::debug!("[incoming_edges] No files detected having function call"); - continue; - } - for possible_filepath in possible_filepaths { - if possible_filepath == *dest_filename { + if let Some(dest_func_name) = dest_func.function_line() { + if let Some(dest_funcdef_line) = dest_func.line_number() { + if let Some(possible_filepaths) = + function_calls_search(review, dest_func_name) + { + if possible_filepaths.is_empty() { + log::debug!("[incoming_edges] No files detected having function call"); continue; } - let possible_path = Path::new(&possible_filepath); - let possible_pathbuf = possible_path.to_path_buf(); - // get func call - if let Some(func_calls) = func_call_identifier.functions_in_file(&possible_pathbuf, lang).await { - // get func def - if let Some(func_map) = generate_function_map(&vec![possible_pathbuf]).await { + for possible_filepath in possible_filepaths { + if possible_filepath == *dest_filename { + continue; + } + let possible_path = Path::new(&possible_filepath); + let possible_pathbuf = possible_path.to_path_buf(); + // get func call + if let Some(func_calls) = func_call_identifier.functions_in_file(&possible_pathbuf, lang).await { + // get func def for func_call in func_calls.function_calls() { - if let Some(func_file_map) = func_map.functions_in_file(&possible_filepath) { - // find correct func def - if let Some(src_func_def) = func_file_map.funcs_for_func_call(func_call) { - if let Some(source_filename) = absolute_to_relative_path(&possible_filepath, review) { - // add edge - - graph_elems.add_edge(edge_color, - func_call.line_number().to_owned() as usize, - func_call.function_name(), - dest_func.func_def().name(), - &source_filename, - &dest_file_rel, - "", - edge_color, - src_func_def.line_start(), - dest_func.func_def().line_start()); + if let Some(src_func_def) = get_function_def_for_func_call( + &possible_pathbuf, func_call.line_number().to_owned() as usize + ).await { + if let Some(source_filename) = absolute_to_relative_path(&possible_filepath, review) { + // add edge + let mut dest_file_rel = dest_filename.to_string(); + if let Some(dest_file_relative_path) = absolute_to_relative_path(&dest_filename, review) { + dest_file_rel = dest_file_relative_path; } + graph_elems.add_edge(edge_color, + func_call.line_number().to_owned() as usize, + src_func_def.name(), + dest_func_name, + &source_filename, + &dest_file_rel, + "", + edge_color, + src_func_def.line_start(), + dest_funcdef_line); } } } @@ -555,5 +436,6 @@ async fn process_func_defs(review: &Review, } } } - } + } +} } \ No newline at end of file diff --git a/vibi-dpu/src/graph/graph_info.rs b/vibi-dpu/src/graph/graph_info.rs index fc4aa795..63f085c5 100644 --- a/vibi-dpu/src/graph/graph_info.rs +++ b/vibi-dpu/src/graph/graph_info.rs @@ -1,6 +1,6 @@ use std::{collections::HashMap, path::PathBuf}; use crate::{graph::function_line_range::generate_function_map, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; -use super::{function_call::{FunctionCallChunk, FunctionCallIdentifier, FunctionCallsOutput}, function_line_range::{AllFileFunctions, HunkFuncDef}, gitops::{get_changed_hunk_lines, HunkDiffMap}, utils::source_diff_files}; +use super::{function_call::{FunctionCallChunk, FunctionCallIdentifier, FunctionCallsOutput}, function_line_range::{AllFileFunctions, HunkFuncDef}, gitops::{get_changed_hunk_lines, HunkDiffMap}, utils::{numbered_content, read_file, source_diff_files}}; #[derive(Debug, Default, Clone)] pub struct DiffFuncDefs { @@ -89,37 +89,36 @@ impl DiffFuncCall { #[derive(Debug, Default, Clone)] pub struct DiffGraph { - diff_files_func_defs: AllFileFunctions, - // diff_files_imports: FilesImportInfo, - diff_files_func_calls: HashMap, - diff_func_defs: HashMap, - diff_func_calls: HashMap, + hunk_diff_map: HunkDiffMap } impl DiffGraph { - pub fn add_func_def(&mut self, filename: String, diff_func_defs: DiffFuncDefs) { - self.diff_func_defs.insert(filename, diff_func_defs); - } - - pub fn add_diff_func_calls(&mut self, filename: String, diff_func_calls: DiffFuncCall) { - self.diff_func_calls.insert(filename, diff_func_calls); + pub fn hunk_diff_map(&self) -> &HunkDiffMap { + &self.hunk_diff_map } + // pub fn add_func_def(&mut self, filename: String, diff_func_defs: DiffFuncDefs) { + // self.diff_func_defs.insert(filename, diff_func_defs); + // } - pub fn all_file_func_defs(&self) -> &AllFileFunctions { - &self.diff_files_func_defs - } + // pub fn add_diff_func_calls(&mut self, filename: String, diff_func_calls: DiffFuncCall) { + // self.diff_func_calls.insert(filename, diff_func_calls); + // } - // pub fn all_file_imports(&self) -> &FilesImportInfo { - // &self.diff_files_imports + // pub fn all_file_func_defs(&self) -> &AllFileFunctions { + // &self.diff_files_func_defs // } - pub fn diff_func_defs(&self) -> &HashMap { - &self.diff_func_defs - } + // // pub fn all_file_imports(&self) -> &FilesImportInfo { + // // &self.diff_files_imports + // // } - pub fn diff_func_calls(&self) -> &HashMap { - &self.diff_func_calls - } + // pub fn diff_func_defs(&self) -> &HashMap { + // &self.diff_func_defs + // } + + // pub fn diff_func_calls(&self) -> &HashMap { + // &self.diff_func_calls + // } // pub fn func_calls_for_func(&self, function_name: &str, filename: &str) -> Option<&FuncCall> { // if let Some(func_call_map) = self.diff_files_func_calls.get(filename) { @@ -138,85 +137,87 @@ pub async fn generate_diff_graph(diff_files: &Vec, review: &Review) -> return None; } let diff_code_files = diff_code_files_opt.expect("Empty diff_code_files_opt"); - let hunk_diff_map = get_changed_hunk_lines(&diff_code_files, review); + let mut hunk_diff_map = get_changed_hunk_lines(&diff_code_files, review); // get func defs for base commit for files in diff log::debug!("[generate_diff_graph] hunk diff map =======~~~~~~~~ {:#?}", &hunk_diff_map); - let diff_graph_opt = process_hunk_diff(&hunk_diff_map, review).await; + let diff_graph_opt = process_hunk_diff(&mut hunk_diff_map, review).await; return diff_graph_opt; } -async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, review: &Review) -> Option { +async fn process_hunk_diff(hunk_diff_map: &mut HunkDiffMap, review: &Review) -> Option { // full graph func def and import info for diff selected files is required. - let all_diff_files = hunk_diff_map.all_files_pathbuf(review.clone_dir()); - // do generate function defs , only starting line - let base_commit_func_defs_opt = generate_function_map(&all_diff_files).await; - if base_commit_func_defs_opt.is_none() { - log::debug!("[process_hunk_diff] Unable to generate func defs for base commit"); - return None; - } - let base_commit_func_defs = base_commit_func_defs_opt.expect("Empty let base_commit_func_defs_opt"); - let base_func_calls_opt = diff_file_func_calls(&all_diff_files, hunk_diff_map, false).await; - if base_func_calls_opt.is_none() { - log::debug!("[process_hunk_diff] Unable to calculate diff_file_func_calls"); - return None; - } - let base_func_calls = base_func_calls_opt.expect("Empty base_func_calls_opt"); - git_checkout_commit(review, &review.pr_head_commit()); - let diff_func_defs_opt = generate_function_map(&all_diff_files).await; - // let diff_imports_opt = get_import_lines(&all_diff_files).await; - // TODO FIXME - opt logic - if diff_func_defs_opt.is_none() { - log::debug!("[process_hunk_diff] Unable to generate func definitions diff map"); - return None; - } - let diff_files_func_defs = diff_func_defs_opt.expect("Empty all_file_func_defs_opt)"); - let diff_files_func_calls_opt = diff_file_func_calls(&all_diff_files, hunk_diff_map, true).await; - if diff_files_func_calls_opt.is_none() { - log::debug!("[process_hunk_diff] Unable to calculate diff_file_func_calls"); - return None; - } - let diff_files_func_calls = diff_files_func_calls_opt.expect("Empty diff_files_func_calls_opt"); - let mut diff_graph = DiffGraph { - diff_files_func_calls: diff_files_func_calls.clone(), - diff_files_func_defs, - // diff_files_imports, - diff_func_defs: HashMap::new(), - diff_func_calls: HashMap::new(), + git_checkout_commit(review, review.pr_head_commit()); + set_func_line_numbers(hunk_diff_map, true); + git_checkout_commit(review, review.base_head_commit()); + set_func_line_numbers(hunk_diff_map, false); + let diff_graph = DiffGraph { + hunk_diff_map: hunk_diff_map.to_owned() }; - for filepath in &all_diff_files { - let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); - let mut diff_func_defs = DiffFuncDefs { - added_func_defs: Vec::new(), deleted_func_defs: Vec::new()}; - // define base and diff func calls output for this filename - if let Some(base_func_call) = base_func_calls.get(filename) { - if let Some(diff_func_call) = diff_files_func_calls.get(filename) { - // initialize and add DiffFuncCall to diff_func_calls_map - let func_calls = DiffFuncCall { - added_calls: diff_func_call.to_owned(), deleted_calls: base_func_call.to_owned()}; - diff_graph.add_diff_func_calls(filename.to_string(), func_calls); - } - }; - if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { - for hunk_diff in file_line_map.added_hunks() { - if let Some(funcs_map) = diff_graph.all_file_func_defs().functions_in_file(filename) { - // find func_defs for files in hunks - let funcs_def_vec = funcs_map.funcs_in_hunk(hunk_diff); - if !funcs_def_vec.is_empty() { - // add func def vec to something with file as key - diff_func_defs.extend_added_funcdefs(funcs_def_vec); - } - } - } - for hunk_diff in file_line_map.deleted_hunks() { - if let Some(funcs_map) = base_commit_func_defs.functions_in_file(filename) { - // find func_defs for files in hunks - let funcs_def_vec = funcs_map.funcs_in_hunk(hunk_diff); - if !funcs_def_vec.is_empty() { - // add func def vec to something with file as key - diff_func_defs.extend_deleted_funcdefs(funcs_def_vec); - } - } - } + return Some(diff_graph); + // let all_diff_files = hunk_diff_map.all_files_pathbuf(review.clone_dir()); + // // do generate function defs , only starting line + // let base_commit_func_defs_opt = generate_function_map(&all_diff_files).await; + // if base_commit_func_defs_opt.is_none() { + // log::debug!("[process_hunk_diff] Unable to generate func defs for base commit"); + // return None; + // } + // let base_commit_func_defs = base_commit_func_defs_opt.expect("Empty let base_commit_func_defs_opt"); + // let base_func_calls_opt = diff_file_func_calls(&all_diff_files, hunk_diff_map, false).await; + // if base_func_calls_opt.is_none() { + // log::debug!("[process_hunk_diff] Unable to calculate diff_file_func_calls"); + // return None; + // } + // let base_func_calls = base_func_calls_opt.expect("Empty base_func_calls_opt"); + // git_checkout_commit(review, &review.pr_head_commit()); + // let diff_func_defs_opt = generate_function_map(&all_diff_files).await; + // // let diff_imports_opt = get_import_lines(&all_diff_files).await; + // // TODO FIXME - opt logic + // if diff_func_defs_opt.is_none() { + // log::debug!("[process_hunk_diff] Unable to generate func definitions diff map"); + // return None; + // } + // let diff_files_func_defs = diff_func_defs_opt.expect("Empty all_file_func_defs_opt)"); + // let diff_files_func_calls_opt = diff_file_func_calls(&all_diff_files, hunk_diff_map, true).await; + // if diff_files_func_calls_opt.is_none() { + // log::debug!("[process_hunk_diff] Unable to calculate diff_file_func_calls"); + // return None; + // } + // let diff_files_func_calls = diff_files_func_calls_opt.expect("Empty diff_files_func_calls_opt"); + + // for filepath in &all_diff_files { + // let filename = filepath.to_str().expect("Unable to deserialize pathbuf"); + // let mut diff_func_defs = DiffFuncDefs { + // added_func_defs: Vec::new(), deleted_func_defs: Vec::new()}; + // // define base and diff func calls output for this filename + // if let Some(base_func_call) = base_func_calls.get(filename) { + // if let Some(diff_func_call) = diff_files_func_calls.get(filename) { + // // initialize and add DiffFuncCall to diff_func_calls_map + // let func_calls = DiffFuncCall { + // added_calls: diff_func_call.to_owned(), deleted_calls: base_func_call.to_owned()}; + // diff_graph.add_diff_func_calls(filename.to_string(), func_calls); + // } + // }; + // if let Some(file_line_map) = hunk_diff_map.file_hunks(filename) { + // for hunk_diff in file_line_map.added_hunks() { + // if let Some(funcs_map) = diff_graph.all_file_func_defs().functions_in_file(filename) { + // // find func_defs for files in hunks + // let funcs_def_vec = funcs_map.funcs_in_hunk(hunk_diff); + // if !funcs_def_vec.is_empty() { + // // add func def vec to something with file as key + // diff_func_defs.extend_added_funcdefs(funcs_def_vec); + // } + // } + // } + // for hunk_diff in file_line_map.deleted_hunks() { + // if let Some(funcs_map) = base_commit_func_defs.functions_in_file(filename) { + // // find func_defs for files in hunks + // let funcs_def_vec = funcs_map.funcs_in_hunk(hunk_diff); + // if !funcs_def_vec.is_empty() { + // // add func def vec to something with file as key + // diff_func_defs.extend_deleted_funcdefs(funcs_def_vec); + // } + // } + // } // TODO FIXME - why no deleted func calls, and how is only diff part sent to find func calls? // find func call in hunks for each import // want to record not all func_calls but hunk specific line numbers @@ -244,8 +245,8 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, review: &Review) -> Opti // // Use full graph's import info // do a git checkout to base commit // do the same thing as done for added_calls - } - diff_graph.add_func_def(filename.to_string(), diff_func_defs); + // } + // diff_graph.add_func_def(filename.to_string(), diff_func_defs); // diff_func_calls_map.insert(filename.to_string(), diff_func_calls_add); } // git_checkout_commit(review, &review.base_head_commit()); @@ -273,39 +274,66 @@ async fn process_hunk_diff(hunk_diff_map: &HunkDiffMap, review: &Review) -> Opti // // for (filename, diff_func_call) in diff_func_calls_map.iter() { // diff_graph.add_diff_func_calls(filename.to_owned(), diff_func_call.to_owned()); // } - return Some(diff_graph); -} +// return Some(diff_graph); +// } -async fn diff_file_func_calls(all_diff_files: &Vec, hunk_diff_map: &HunkDiffMap, added: bool) -> Option> { - // func calls made in diff hunks for all diff files - let mut func_call_file_map = HashMap::new(); - let func_call_identifier_opt = FunctionCallIdentifier::new(); - if func_call_identifier_opt.is_none() { - log::error!("[diff_file_func_calls] Unable to create FunctionCallIdentifier"); - return None; - } - let mut func_call_identifier = func_call_identifier_opt.expect("Empty func_call_identifier_opt"); - for filepathbuf in all_diff_files { - let filepath = filepathbuf.to_str().expect("Unable to deserialize pathbuf"); - let hunk_diffs_opt = hunk_diff_map.file_hunks(filepath); - if hunk_diffs_opt.is_none() { - log::debug!("[diff_file_func_calls] No entry in hunk_diff_map for {}", filepath); - continue; - } - let hunk_diffs = hunk_diffs_opt.expect("Empty hunk_diffs_opt"); +// async fn diff_file_func_calls(all_diff_files: &Vec, hunk_diff_map: &HunkDiffMap, added: bool) -> Option>> { +// // func calls made in diff hunks for all diff files +// let mut func_call_file_map = HashMap::new(); +// let func_call_identifier_opt = FunctionCallIdentifier::new(); +// if func_call_identifier_opt.is_none() { +// log::error!("[diff_file_func_calls] Unable to create FunctionCallIdentifier"); +// return None; +// } +// let mut func_call_identifier = func_call_identifier_opt.expect("Empty func_call_identifier_opt"); +// for filepathbuf in all_diff_files { +// let filepath = filepathbuf.to_str().expect("Unable to deserialize pathbuf"); +// let hunk_diffs_opt = hunk_diff_map.file_hunks(filepath); +// if hunk_diffs_opt.is_none() { +// log::debug!("[diff_file_func_calls] No entry in hunk_diff_map for {}", filepath); +// continue; +// } +// let hunk_diffs = hunk_diffs_opt.expect("Empty hunk_diffs_opt"); +// let file_hunks; +// if added { +// file_hunks = hunk_diffs.added_hunks(); +// } else { +// file_hunks = hunk_diffs.deleted_hunks(); +// } +// let func_calls_opt = func_call_identifier.function_calls_in_hunks(filepathbuf, "rust", file_hunks).await; +// if func_calls_opt.is_none() { +// log::debug!("[diff_file_func_calls] No function calls in hunks: {}, {:?}", filepath, hunk_diffs); +// continue; +// } +// let func_calls = func_calls_opt.expect("Empty func_calls_opt"); +// func_call_file_map.insert(filepath.to_string(), func_calls); +// } +// return Some(func_call_file_map); +// } + +fn set_func_line_numbers(hunk_diff_map: &mut HunkDiffMap, added: bool) -> &mut HunkDiffMap { + for (filepath, file_func_diff) in hunk_diff_map.file_line_map_mut() { let file_hunks; if added { - file_hunks = hunk_diffs.added_hunks(); + file_hunks = file_func_diff.added_hunks_mut(); } else { - file_hunks = hunk_diffs.deleted_hunks(); + file_hunks = file_func_diff.deleted_hunks_mut(); } - let func_calls_opt = func_call_identifier.function_calls_in_hunks(filepathbuf, "rust", file_hunks).await; - if func_calls_opt.is_none() { - log::debug!("[diff_file_func_calls] No function calls in hunks: {}, {:?}", filepath, hunk_diffs); - continue; + for file_hunk in file_hunks { + if let Some(func_line_raw) = file_hunk.function_line() { + // get line number + if let Some(file_contents) = read_file(filepath) { + let line_number_opt = file_contents + .lines() // Split into lines + .enumerate() // Get (index, line) + .position(|(_, line)| line == func_line_raw) // Find the position where the line matches + .map(|index| index + 1); // Convert 0-based index to 1-based line number + + file_hunk.set_line_number(line_number_opt); + } + // get function name from llm + } } - let func_calls = func_calls_opt.expect("Empty func_calls_opt"); - func_call_file_map.insert(filepath.to_string(), func_calls); } - return Some(func_call_file_map); -} \ No newline at end of file + return hunk_diff_map; +} \ No newline at end of file diff --git a/vibi-dpu/src/utils/gitops.rs b/vibi-dpu/src/utils/gitops.rs index f6bcdf7e..4fc05f51 100644 --- a/vibi-dpu/src/utils/gitops.rs +++ b/vibi-dpu/src/utils/gitops.rs @@ -251,6 +251,7 @@ pub fn generate_diff(review: &Review, smallfiles: &Vec) -> HashMap Date: Sat, 12 Oct 2024 01:59:13 +0530 Subject: [PATCH 37/43] fix func def schema description and parsing func line from git diff --- vibi-dpu/src/graph/function_line_range.rs | 21 +++++++++++++++++++-- vibi-dpu/src/graph/gitops.rs | 15 +++++++-------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/vibi-dpu/src/graph/function_line_range.rs b/vibi-dpu/src/graph/function_line_range.rs index e2aefc45..cdffc5fc 100644 --- a/vibi-dpu/src/graph/function_line_range.rs +++ b/vibi-dpu/src/graph/function_line_range.rs @@ -80,11 +80,28 @@ struct InputSchema { function_name: String, } +#[derive(Serialize, Deserialize, Debug)] +struct InputSchemaDescription { + code_chunk: String, + language: String, + function_name: String, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct OutputSchemaDescription { + function_definition: FunctionDefinitionDescription, + notes: String, +} +#[derive(Serialize, Deserialize, Debug, Default, Clone)] +struct FunctionDefinitionDescription { + line_number: String, +} + // Struct for instructions that hold input/output schemas #[derive(Serialize, Deserialize, Debug)] struct Instructions { - input_schema: InputSchema, - output_schema: FunctionDefOutput, + input_schema: InputSchemaDescription, + output_schema: OutputSchemaDescription, task_description: String, } // Struct for the entire JSON prompt diff --git a/vibi-dpu/src/graph/gitops.rs b/vibi-dpu/src/graph/gitops.rs index 920673b8..d65bca6e 100644 --- a/vibi-dpu/src/graph/gitops.rs +++ b/vibi-dpu/src/graph/gitops.rs @@ -118,7 +118,7 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu for item in diff_files { let filepath = item.filepath.as_str(); let commit_range = format!("{}...{}", prev_commit, curr_commit); - log::debug!("[extract_hunks] | clone_dir = {:?}, filepath = {:?}", clone_dir, filepath); + log::debug!("[get_changed_hunk_lines] | clone_dir = {:?}, filepath = {:?}", clone_dir, filepath); let output_res = Command::new("git") .arg("diff") @@ -131,7 +131,7 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu if output_res.is_err() { let commanderr = output_res.expect_err("No error in output_res"); - log::error!("[extract_hunks] git diff command failed to start : {:?}", commanderr); + log::error!("[get_changed_hunk_lines] git diff command failed to start : {:?}", commanderr); continue; } @@ -141,12 +141,12 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu if diffstr_res.is_err() { let e = diffstr_res.expect_err("No error in diffstr_res"); - log::error!("[extract_hunks] Unable to deserialize diff: {:?}", e); + log::error!("[get_changed_hunk_lines] Unable to deserialize diff: {:?}", e); continue; } let diffstr = diffstr_res.expect("Uncaught error in diffstr_res"); - log::debug!("[extract_hunks] diffstr = {}", &diffstr); + log::debug!("[get_changed_hunk_lines] diffstr = {}", &diffstr); let mut current_add_start = 0; let mut current_del_start = 0; @@ -187,13 +187,12 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu in_del_hunk = false; // Extract the function name or any string after the last @@ - let parts: Vec<&str> = line.split_whitespace().collect(); - if parts.len() > 2 { - function_line = Some(parts[2].to_string()); // Store the function line here + if let Some(pos) = line.rfind("@@ ") { + function_line = Some(line[(pos+3)..].to_string()); } else { function_line = None; // Reset if no valid function line found } - + let parts: Vec<&str> = line.split_whitespace().collect(); // Determine the start and end lines for the hunks let del_hunk = parts.get(1); let add_hunk = parts.get(2); From 99bc4267b622e0bd8a31ea4ab07cf94383431999 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sat, 12 Oct 2024 02:21:48 +0530 Subject: [PATCH 38/43] fix function line matching --- vibi-dpu/src/graph/graph_info.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vibi-dpu/src/graph/graph_info.rs b/vibi-dpu/src/graph/graph_info.rs index 63f085c5..274e3ef8 100644 --- a/vibi-dpu/src/graph/graph_info.rs +++ b/vibi-dpu/src/graph/graph_info.rs @@ -326,7 +326,7 @@ fn set_func_line_numbers(hunk_diff_map: &mut HunkDiffMap, added: bool) -> &mut H let line_number_opt = file_contents .lines() // Split into lines .enumerate() // Get (index, line) - .position(|(_, line)| line == func_line_raw) // Find the position where the line matches + .position(|(_, line)| line.contains(func_line_raw)) // Find the position where the line matches .map(|index| index + 1); // Convert 0-based index to 1-based line number file_hunk.set_line_number(line_number_opt); From 691fb566220726cf1be45ac6ff83e5967e9964ad Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sat, 12 Oct 2024 05:34:48 +0530 Subject: [PATCH 39/43] Implement getting function name from llm --- vibi-dpu/src/graph/function_name.rs | 96 ++++++++++++++++++++++++++ vibi-dpu/src/graph/gitops.rs | 19 +++-- vibi-dpu/src/graph/graph_edges.rs | 2 +- vibi-dpu/src/graph/graph_info.rs | 28 +++++--- vibi-dpu/src/graph/mermaid_elements.rs | 4 +- vibi-dpu/src/graph/mod.rs | 3 +- 6 files changed, 133 insertions(+), 19 deletions(-) create mode 100644 vibi-dpu/src/graph/function_name.rs diff --git a/vibi-dpu/src/graph/function_name.rs b/vibi-dpu/src/graph/function_name.rs new file mode 100644 index 00000000..8846b7aa --- /dev/null +++ b/vibi-dpu/src/graph/function_name.rs @@ -0,0 +1,96 @@ +use serde::{Deserialize, Serialize}; +use super::utils::{call_llm_api, read_file}; + +// Struct to represent the output schema +#[derive(Serialize, Deserialize, Debug)] +pub struct FunctionNameOutput { + function_name: String, + notes: Option, +} + +impl FunctionNameOutput { + pub fn get_function_name(&self) -> &String { + &self.function_name + } +} + +// Struct to represent the input schema +#[derive(Serialize, Deserialize, Debug)] +struct InputSchema { + code_line: String, + language: String, +} + +// Struct for instructions that hold input/output schemas +#[derive(Serialize, Deserialize, Debug)] +struct Instructions { + input_schema: InputSchema, + output_schema: FunctionNameOutput, + task_description: String, +} +// Struct for the entire JSON prompt +#[derive(Serialize, Deserialize, Debug)] +struct FunctionNamePrompt { + instructions: Instructions, + sample_input: InputSchema, + expected_output: FunctionNameOutput, + input: Option, +} + +impl FunctionNamePrompt { + fn set_input(&mut self, input: InputSchema) { + self.input = Some(input); + } +} + +pub struct FunctionNameIdentifier { + prompt: FunctionNamePrompt +} + +impl FunctionNameIdentifier { + pub fn new() -> Option { + let system_prompt_opt = read_file("/app/prompts/prompt_function_name"); + if system_prompt_opt.is_none() { + log::error!("[FunctionNameIdentifier/new] Unable to read prompt_function_name"); + return None; + } + let system_prompt_lines = system_prompt_opt.expect("Empty system_prompt"); + let prompt_json_res = serde_json::from_str(&system_prompt_lines); + if prompt_json_res.is_err() { + log::error!("[FunctionNameIdentifier/new] Unable to deserialize prompt_json: {:?}", + prompt_json_res.expect("Empty prompt_json_res")); + return None; + } + let prompt_json: FunctionNamePrompt = prompt_json_res.expect("Empty error in prompt_json_res"); + return Some(Self { prompt: prompt_json}); + } + + pub async fn function_name_in_line(&mut self, code_line: &str, lang: &str) -> Option { + // concatenate functioncallsoutput for all chunks + let input = InputSchema{ code_line: code_line.to_string(), language: lang.to_string() }; + self.prompt.input = Some(input); + let prompt_str_res = serde_json::to_string(&self.prompt); + if prompt_str_res.is_err() { + log::error!( + "[FunctionNameIdentifier/function_name_in_line] Unable to serialize prompt: {:?}", + prompt_str_res.expect_err("Empty error in prompt_str_res")); + return None; + } + let prompt_str = prompt_str_res.expect("Uncaught error in prompt_str_res"); + let final_prompt = format!("{}\nOutput - ", &prompt_str); + let prompt_response_opt = call_llm_api(final_prompt).await; + if prompt_response_opt.is_none() { + log::error!("[FunctionNameIdentifier/function_name_in_line] Unable to call llm for code line: {:?}", code_line); + return None; + } + let prompt_response = prompt_response_opt.expect("Empty prompt_response_opt"); + let deserialized_response = serde_json::from_str(&prompt_response); + if deserialized_response.is_err() { + let e = deserialized_response.expect_err("Empty error in deserialized_response"); + log::error!("[FunctionNameIdentifier/function_name_in_line] Error in deserializing response: {:?}", e); + return None; + } + let func_calls: FunctionNameOutput = deserialized_response.expect("Empty error in deserialized_response"); + return Some(func_calls); + } +} \ No newline at end of file diff --git a/vibi-dpu/src/graph/gitops.rs b/vibi-dpu/src/graph/gitops.rs index d65bca6e..55203021 100644 --- a/vibi-dpu/src/graph/gitops.rs +++ b/vibi-dpu/src/graph/gitops.rs @@ -7,7 +7,8 @@ pub struct HunkDiffLines { start_line: usize, end_line: usize, function_line: Option, - line_number: Option + line_number: Option, + function_name: Option } impl HunkDiffLines { @@ -30,6 +31,10 @@ impl HunkDiffLines { pub fn set_line_number(&mut self, line_number: Option) { self.line_number = line_number; } + + pub fn set_function_name(&mut self, function_name: String) { + self.function_name = Some(function_name); + } } #[derive(Debug, Default, Clone)] @@ -170,7 +175,8 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu start_line: current_add_start, end_line: current_add_end, function_line: function_line.clone(), // Use the function line stored - line_number: None + line_number: None, + function_name: None }); } if in_del_hunk { @@ -178,7 +184,8 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu start_line: current_del_start, end_line: current_del_end, function_line: function_line.clone(), // Use the function line stored - line_number: None + line_number: None, + function_name: None }); } @@ -225,7 +232,8 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu start_line: current_add_start, end_line: current_add_end, function_line: function_line.clone(), // Use the function line stored - line_number: None + line_number: None, + function_name: None }); } if in_del_hunk { @@ -233,7 +241,8 @@ pub fn get_changed_hunk_lines(diff_files: &Vec, review: &Review) -> Hu start_line: current_del_start, end_line: current_del_end, function_line: function_line.clone(), // Use the function line stored - line_number: None + line_number: None, + function_name: None }); } diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs index 99cfddd2..bb83d786 100644 --- a/vibi-dpu/src/graph/graph_edges.rs +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -194,7 +194,7 @@ async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &m return; } let mut funcdef_identifier = func_def_identifier_opt.expect("Empty func_def_identifier_opt"); - let mut func_call_identifier_opt = FunctionCallIdentifier::new(); + let func_call_identifier_opt = FunctionCallIdentifier::new(); if func_call_identifier_opt.is_none() { log::error!("[incoming_edges] Unable to create new FunctionCallIdentifier"); return; diff --git a/vibi-dpu/src/graph/graph_info.rs b/vibi-dpu/src/graph/graph_info.rs index 274e3ef8..e67dfc21 100644 --- a/vibi-dpu/src/graph/graph_info.rs +++ b/vibi-dpu/src/graph/graph_info.rs @@ -1,6 +1,6 @@ use std::{collections::HashMap, path::PathBuf}; use crate::{graph::function_line_range::generate_function_map, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; -use super::{function_call::{FunctionCallChunk, FunctionCallIdentifier, FunctionCallsOutput}, function_line_range::{AllFileFunctions, HunkFuncDef}, gitops::{get_changed_hunk_lines, HunkDiffMap}, utils::{numbered_content, read_file, source_diff_files}}; +use super::{function_call::{FunctionCallChunk, FunctionCallIdentifier, FunctionCallsOutput}, function_line_range::{AllFileFunctions, HunkFuncDef}, function_name::FunctionNameIdentifier, gitops::{get_changed_hunk_lines, HunkDiffMap}, utils::{numbered_content, read_file, source_diff_files}}; #[derive(Debug, Default, Clone)] pub struct DiffFuncDefs { @@ -130,7 +130,7 @@ impl DiffGraph { // } } -pub async fn generate_diff_graph(diff_files: &Vec, review: &Review) -> Option { +pub async fn generate_diff_graph(diff_files: &Vec, review: &Review, lang: &str) -> Option { let diff_code_files_opt = source_diff_files(diff_files); if diff_code_files_opt.is_none() { log::debug!("[generate_diff_graph] No relevant source diff files in: {:#?}", diff_files); @@ -140,16 +140,22 @@ pub async fn generate_diff_graph(diff_files: &Vec, review: &Review) -> let mut hunk_diff_map = get_changed_hunk_lines(&diff_code_files, review); // get func defs for base commit for files in diff log::debug!("[generate_diff_graph] hunk diff map =======~~~~~~~~ {:#?}", &hunk_diff_map); - let diff_graph_opt = process_hunk_diff(&mut hunk_diff_map, review).await; + let diff_graph_opt = process_hunk_diff(&mut hunk_diff_map, review, lang).await; return diff_graph_opt; } -async fn process_hunk_diff(hunk_diff_map: &mut HunkDiffMap, review: &Review) -> Option { +async fn process_hunk_diff(hunk_diff_map: &mut HunkDiffMap, review: &Review, lang: &str) -> Option { // full graph func def and import info for diff selected files is required. + let func_name_identifier_opt = FunctionNameIdentifier::new(); + if func_name_identifier_opt.is_none() { + log::error!("[process_hunk_diff] Unable to initialize function name identifier"); + return None; + } + let mut func_name_identifier = func_name_identifier_opt.expect("Empty func_name_identifier_opt"); git_checkout_commit(review, review.pr_head_commit()); - set_func_line_numbers(hunk_diff_map, true); + set_func_def_info(hunk_diff_map, &mut func_name_identifier, lang, true).await; git_checkout_commit(review, review.base_head_commit()); - set_func_line_numbers(hunk_diff_map, false); + set_func_def_info(hunk_diff_map, &mut func_name_identifier, lang, false).await; let diff_graph = DiffGraph { hunk_diff_map: hunk_diff_map.to_owned() }; @@ -311,7 +317,7 @@ async fn process_hunk_diff(hunk_diff_map: &mut HunkDiffMap, review: &Review) -> // return Some(func_call_file_map); // } -fn set_func_line_numbers(hunk_diff_map: &mut HunkDiffMap, added: bool) -> &mut HunkDiffMap { +async fn set_func_def_info(hunk_diff_map: &mut HunkDiffMap, func_name_identifier: &mut FunctionNameIdentifier, lang: &str, added: bool) { for (filepath, file_func_diff) in hunk_diff_map.file_line_map_mut() { let file_hunks; if added { @@ -320,20 +326,22 @@ fn set_func_line_numbers(hunk_diff_map: &mut HunkDiffMap, added: bool) -> &mut H file_hunks = file_func_diff.deleted_hunks_mut(); } for file_hunk in file_hunks { - if let Some(func_line_raw) = file_hunk.function_line() { + if let Some(func_line_raw) = file_hunk.function_line().clone() { // get line number if let Some(file_contents) = read_file(filepath) { let line_number_opt = file_contents .lines() // Split into lines .enumerate() // Get (index, line) - .position(|(_, line)| line.contains(func_line_raw)) // Find the position where the line matches + .position(|(_, line)| line.contains(&func_line_raw)) // Find the position where the line matches .map(|index| index + 1); // Convert 0-based index to 1-based line number file_hunk.set_line_number(line_number_opt); + if let Some(func_name) = func_name_identifier.function_name_in_line(&func_line_raw, lang).await { + file_hunk.set_function_name(func_name.get_function_name().to_string()); + } } // get function name from llm } } } - return hunk_diff_map; } \ No newline at end of file diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs index 4d9c823a..3ce88c28 100644 --- a/vibi-dpu/src/graph/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -39,7 +39,8 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review } let base_filepaths = base_filepaths_opt.expect("Empty base_filepaths_opt"); // let base_commit_import_info = get_test_import_info(); - let diff_graph_opt = generate_diff_graph(diff_files, review).await; + let lang = "rust"; + let diff_graph_opt = generate_diff_graph(diff_files, review, lang).await; log::debug!("[generate_flowchart_elements] diff_graph_opt = {:#?}", &diff_graph_opt); if diff_graph_opt.is_none() { log::error!( @@ -59,7 +60,6 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review // } // let head_filepaths = head_filepaths_opt.expect("Empty head_filepaths_opt"); let mut graph_elems = MermaidGraphElements::new(); - let lang = "rust"; graph_edges(&base_filepaths, review, &diff_graph, &mut graph_elems, lang).await; let elems_str = graph_elems.render_elements(review); return Some(elems_str); diff --git a/vibi-dpu/src/graph/mod.rs b/vibi-dpu/src/graph/mod.rs index f36287bc..849da12e 100644 --- a/vibi-dpu/src/graph/mod.rs +++ b/vibi-dpu/src/graph/mod.rs @@ -6,4 +6,5 @@ pub mod function_line_range; pub mod file_imports; pub mod graph_info; pub mod graph_edges; -pub mod function_call; \ No newline at end of file +pub mod function_call; +pub mod function_name; \ No newline at end of file From eb003576736e2d48ec880e163cb43b266adbe8d9 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sat, 12 Oct 2024 07:32:10 +0530 Subject: [PATCH 40/43] cache func name, add language and fix abs path in rg --- vibi-dpu/src/graph/function_call.rs | 13 +-- vibi-dpu/src/graph/function_name.rs | 11 ++- vibi-dpu/src/graph/gitops.rs | 4 + vibi-dpu/src/graph/graph_edges.rs | 56 ++++++++---- vibi-dpu/src/graph/graph_info.rs | 21 ++--- vibi-dpu/src/graph/mermaid_elements.rs | 7 +- vibi-dpu/src/graph/utils.rs | 119 +++++++++++++++++++++++-- 7 files changed, 184 insertions(+), 47 deletions(-) diff --git a/vibi-dpu/src/graph/function_call.rs b/vibi-dpu/src/graph/function_call.rs index 083dbddc..f21fa282 100644 --- a/vibi-dpu/src/graph/function_call.rs +++ b/vibi-dpu/src/graph/function_call.rs @@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize}; use std::io::BufRead; use crate::utils::review::Review; -use super::{gitops::{HunkDiffLines, HunkDiffMap}, utils::{call_llm_api, numbered_content, read_file}}; +use super::{gitops::{HunkDiffLines, HunkDiffMap}, utils::{call_llm_api, detect_language, numbered_content, read_file}}; #[derive(Debug, Serialize, Default, Deserialize, Clone)] pub struct FunctionCallChunk { @@ -345,13 +345,12 @@ impl FunctionCallIdentifier { } -pub fn function_calls_search(review: &Review, function_name: &str) -> Option>{ +pub fn function_calls_search(review: &Review, function_name: &str, lang: &str) -> Option>{ let pattern = format!(r"{}\([^\)]*\)", function_name); // Regex pattern for the specific function call let directory = review.clone_dir(); // The directory to search in (current directory here) - // Spawn the ripgrep process, adding `-l` for filenames and `--absolute-path` for absolute paths + // Spawn the ripgrep process, adding `-l` for filenames let rg_command_res = Command::new("rg") - .arg("--absolute-path") // Print absolute file paths .arg("-l") // Print only filenames that contain matches .arg("-e") // Use regular expression .arg(pattern) // The regex pattern for function calls @@ -374,7 +373,11 @@ pub fn function_calls_search(review: &Review, function_name: &str) -> Option } impl FunctionNameIdentifier { @@ -62,11 +65,14 @@ impl FunctionNameIdentifier { return None; } let prompt_json: FunctionNamePrompt = prompt_json_res.expect("Empty error in prompt_json_res"); - return Some(Self { prompt: prompt_json}); + return Some(Self { prompt: prompt_json, cached_output: HashMap::new()}); } pub async fn function_name_in_line(&mut self, code_line: &str, lang: &str) -> Option { // concatenate functioncallsoutput for all chunks + if let Some(cached_func_name) = self.cached_output.get(code_line) { + return Some(FunctionNameOutput{ function_name: cached_func_name.to_string(), notes: None }) + } let input = InputSchema{ code_line: code_line.to_string(), language: lang.to_string() }; self.prompt.input = Some(input); let prompt_str_res = serde_json::to_string(&self.prompt); @@ -91,6 +97,7 @@ impl FunctionNameIdentifier { return None; } let func_calls: FunctionNameOutput = deserialized_response.expect("Empty error in deserialized_response"); + self.cached_output.insert(code_line.to_string(), func_calls.get_function_name().to_string()); return Some(func_calls); } } \ No newline at end of file diff --git a/vibi-dpu/src/graph/gitops.rs b/vibi-dpu/src/graph/gitops.rs index 55203021..86e8c851 100644 --- a/vibi-dpu/src/graph/gitops.rs +++ b/vibi-dpu/src/graph/gitops.rs @@ -24,6 +24,10 @@ impl HunkDiffLines { &self.function_line } + pub fn function_name(&self) -> &Option { + &self.function_name + } + pub fn line_number(&self) -> &Option { &self.line_number } diff --git a/vibi-dpu/src/graph/graph_edges.rs b/vibi-dpu/src/graph/graph_edges.rs index bb83d786..ea464782 100644 --- a/vibi-dpu/src/graph/graph_edges.rs +++ b/vibi-dpu/src/graph/graph_edges.rs @@ -1,14 +1,14 @@ use std::path::{Path, PathBuf}; use crate::utils::{gitops::git_checkout_commit, review::Review}; -use super::{elements::MermaidGraphElements, file_imports::ImportIdentifier, function_call::{function_calls_search, FunctionCallIdentifier}, function_line_range::{generate_function_map, get_function_def_for_func_call, FunctionDefIdentifier}, graph_info::DiffGraph, utils::absolute_to_relative_path}; +use super::{elements::MermaidGraphElements, file_imports::ImportIdentifier, function_call::{function_calls_search, FunctionCallIdentifier}, function_line_range::{generate_function_map, get_function_def_for_func_call, FunctionDefIdentifier}, graph_info::DiffGraph, utils::{absolute_to_relative_path, detect_language}}; -pub async fn graph_edges(base_filepaths: &Vec, review: &Review, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, lang: &str) { - outgoing_edges(base_filepaths, diff_graph, graph_elems, review, lang).await; - incoming_edges(review, diff_graph, graph_elems, lang).await; +pub async fn graph_edges(base_filepaths: &Vec, review: &Review, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { + outgoing_edges(base_filepaths, diff_graph, graph_elems, review).await; + incoming_edges(review, diff_graph, graph_elems).await; } -async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements, lang :&str) { +async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &mut MermaidGraphElements) { // filter files with ripgrep // for each filtered file // get func call @@ -206,7 +206,6 @@ async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &m &mut funcdef_identifier, diff_graph, &mut func_call_identifier, - lang, graph_elems, "green" ).await; @@ -216,7 +215,6 @@ async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &m &mut funcdef_identifier, diff_graph, &mut func_call_identifier, - lang, graph_elems, "red" ).await; @@ -234,7 +232,7 @@ async fn incoming_edges(review: &Review, diff_graph: &DiffGraph, graph_elems: &m // } async fn outgoing_edges(base_filepaths: &Vec, diff_graph: &DiffGraph, - graph_elems: &mut MermaidGraphElements, review: &Review, lang: &str) + graph_elems: &mut MermaidGraphElements, review: &Review) { let func_call_identifier_opt = FunctionCallIdentifier::new(); if func_call_identifier_opt.is_none() { @@ -259,7 +257,6 @@ async fn outgoing_edges(base_filepaths: &Vec, diff_graph: &DiffGraph, &mut import_identifier, &mut func_call_identifier, &mut funcdef_identifier, - lang, review, diff_graph, base_filepaths, @@ -269,7 +266,6 @@ async fn outgoing_edges(base_filepaths: &Vec, diff_graph: &DiffGraph, process_func_calls(&mut import_identifier, &mut func_call_identifier, &mut funcdef_identifier, - lang, review, diff_graph, base_filepaths, @@ -279,7 +275,7 @@ async fn outgoing_edges(base_filepaths: &Vec, diff_graph: &DiffGraph, async fn process_func_calls(import_identifier: &mut ImportIdentifier, func_call_identifier: &mut FunctionCallIdentifier, funcdef_identifier: &mut FunctionDefIdentifier, - lang: &str, review: &Review, diff_graph: &DiffGraph, base_filepaths: &Vec, + review: &Review, diff_graph: &DiffGraph, base_filepaths: &Vec, graph_elems: &mut MermaidGraphElements, edge_color: &str) { for (source_filepath, src_file_hunks) in diff_graph.hunk_diff_map().file_line_map() { @@ -294,14 +290,20 @@ async fn process_func_calls(import_identifier: &mut ImportIdentifier, func_call_ } else { diff_hunks = src_file_hunks.deleted_hunks(); } + let lang_opt = detect_language(source_filepath); + if lang_opt.is_none() { + log::error!("[get_import_path_file] Unable to determine language: {}", source_filepath); + return; + } + let lang = lang_opt.expect("Empty lang_opt"); let source_file_path = Path::new(source_filepath); let source_file_pathbuf = source_file_path.to_path_buf(); if let Some(hunk_func_calls) = func_call_identifier. - function_calls_in_hunks(&source_file_pathbuf, lang, diff_hunks).await { + function_calls_in_hunks(&source_file_pathbuf, &lang, diff_hunks).await { for (hunk_lines, func_call_output) in hunk_func_calls { for dest_func_call in func_call_output.function_calls() { if let Some(import_filepath) = import_identifier.get_import_path_file( - source_filepath, lang, dest_func_call.function_name()).await { + source_filepath, &lang, dest_func_call.function_name()).await { // get file // get diffgraph all files and see if they contain filepath let possible_diff_file_paths: Vec<&String> = diff_graph.hunk_diff_map().all_files().into_iter() @@ -345,7 +347,7 @@ async fn process_func_calls(import_identifier: &mut ImportIdentifier, func_call_ // search only for func def with specific name // if something comes up, add edge! if let Some(func_defs) = funcdef_identifier.function_defs_in_file( - possible_file_pathbuf, lang, dest_func_call.function_name()).await { + possible_file_pathbuf, &lang, dest_func_call.function_name()).await { if let Some(dest_func_def_line) = func_defs.get_function_line_number() { if let Some(src_func_name) = hunk_lines.function_line() { if let Some(src_func_line_number) = hunk_lines.line_number() { @@ -380,9 +382,15 @@ async fn process_func_calls(import_identifier: &mut ImportIdentifier, func_call_ async fn process_func_defs(review: &Review, funcdef_identifier: &mut FunctionDefIdentifier, diff_graph: &DiffGraph, func_call_identifier: &mut FunctionCallIdentifier, - lang: &str, graph_elems: &mut MermaidGraphElements, edge_color: &str) + graph_elems: &mut MermaidGraphElements, edge_color: &str) { for (dest_filename, dest_file_hunks) in diff_graph.hunk_diff_map().file_line_map() { + let dest_lang_opt = detect_language(&dest_filename); + if dest_lang_opt.is_none() { + log::error!("[process_func_defs] Unable to detect language: {}", dest_filename); + continue; + } + let dest_lang = dest_lang_opt.expect("Empty dest_lang_opt"); let func_defs; if edge_color == "red" { func_defs = dest_file_hunks.deleted_hunks(); @@ -390,23 +398,33 @@ async fn process_func_defs(review: &Review, funcdef_identifier: &mut FunctionDef func_defs = dest_file_hunks.added_hunks(); } for dest_func in func_defs { - if let Some(dest_func_name) = dest_func.function_line() { + if let Some(dest_func_name) = dest_func.function_name() { if let Some(dest_funcdef_line) = dest_func.line_number() { if let Some(possible_filepaths) = - function_calls_search(review, dest_func_name) + function_calls_search(review, dest_func_name, &dest_lang) { if possible_filepaths.is_empty() { - log::debug!("[incoming_edges] No files detected having function call"); + log::debug!("[process_func_defs] No files detected having function call"); continue; } for possible_filepath in possible_filepaths { if possible_filepath == *dest_filename { continue; } + let lang_opt = detect_language(&possible_filepath); + if lang_opt.is_none() { + log::debug!("[process_func_defs] Unable to determine language: {}", &possible_filepath); + continue; + } + let lang = lang_opt.expect("Empty lang_opt"); + if lang != dest_lang { + log::debug!("[process_func_defs] Different languages: {}, {}", &lang, &dest_lang); + continue; + } let possible_path = Path::new(&possible_filepath); let possible_pathbuf = possible_path.to_path_buf(); // get func call - if let Some(func_calls) = func_call_identifier.functions_in_file(&possible_pathbuf, lang).await { + if let Some(func_calls) = func_call_identifier.functions_in_file(&possible_pathbuf, &lang).await { // get func def for func_call in func_calls.function_calls() { if let Some(src_func_def) = get_function_def_for_func_call( diff --git a/vibi-dpu/src/graph/graph_info.rs b/vibi-dpu/src/graph/graph_info.rs index e67dfc21..a50520d3 100644 --- a/vibi-dpu/src/graph/graph_info.rs +++ b/vibi-dpu/src/graph/graph_info.rs @@ -1,6 +1,6 @@ use std::{collections::HashMap, path::PathBuf}; use crate::{graph::function_line_range::generate_function_map, utils::{gitops::{git_checkout_commit, StatItem}, review::Review}}; -use super::{function_call::{FunctionCallChunk, FunctionCallIdentifier, FunctionCallsOutput}, function_line_range::{AllFileFunctions, HunkFuncDef}, function_name::FunctionNameIdentifier, gitops::{get_changed_hunk_lines, HunkDiffMap}, utils::{numbered_content, read_file, source_diff_files}}; +use super::{function_call::{FunctionCallChunk, FunctionCallIdentifier, FunctionCallsOutput}, function_line_range::{AllFileFunctions, HunkFuncDef}, function_name::FunctionNameIdentifier, gitops::{get_changed_hunk_lines, HunkDiffMap}, utils::{detect_language, numbered_content, read_file, source_diff_files}}; #[derive(Debug, Default, Clone)] pub struct DiffFuncDefs { @@ -130,7 +130,7 @@ impl DiffGraph { // } } -pub async fn generate_diff_graph(diff_files: &Vec, review: &Review, lang: &str) -> Option { +pub async fn generate_diff_graph(diff_files: &Vec, review: &Review) -> Option { let diff_code_files_opt = source_diff_files(diff_files); if diff_code_files_opt.is_none() { log::debug!("[generate_diff_graph] No relevant source diff files in: {:#?}", diff_files); @@ -140,11 +140,11 @@ pub async fn generate_diff_graph(diff_files: &Vec, review: &Review, la let mut hunk_diff_map = get_changed_hunk_lines(&diff_code_files, review); // get func defs for base commit for files in diff log::debug!("[generate_diff_graph] hunk diff map =======~~~~~~~~ {:#?}", &hunk_diff_map); - let diff_graph_opt = process_hunk_diff(&mut hunk_diff_map, review, lang).await; + let diff_graph_opt = process_hunk_diff(&mut hunk_diff_map, review).await; return diff_graph_opt; } -async fn process_hunk_diff(hunk_diff_map: &mut HunkDiffMap, review: &Review, lang: &str) -> Option { +async fn process_hunk_diff(hunk_diff_map: &mut HunkDiffMap, review: &Review) -> Option { // full graph func def and import info for diff selected files is required. let func_name_identifier_opt = FunctionNameIdentifier::new(); if func_name_identifier_opt.is_none() { @@ -153,9 +153,9 @@ async fn process_hunk_diff(hunk_diff_map: &mut HunkDiffMap, review: &Review, lan } let mut func_name_identifier = func_name_identifier_opt.expect("Empty func_name_identifier_opt"); git_checkout_commit(review, review.pr_head_commit()); - set_func_def_info(hunk_diff_map, &mut func_name_identifier, lang, true).await; + set_func_def_info(hunk_diff_map, &mut func_name_identifier, true).await; git_checkout_commit(review, review.base_head_commit()); - set_func_def_info(hunk_diff_map, &mut func_name_identifier, lang, false).await; + set_func_def_info(hunk_diff_map, &mut func_name_identifier, false).await; let diff_graph = DiffGraph { hunk_diff_map: hunk_diff_map.to_owned() }; @@ -317,7 +317,7 @@ async fn process_hunk_diff(hunk_diff_map: &mut HunkDiffMap, review: &Review, lan // return Some(func_call_file_map); // } -async fn set_func_def_info(hunk_diff_map: &mut HunkDiffMap, func_name_identifier: &mut FunctionNameIdentifier, lang: &str, added: bool) { +async fn set_func_def_info(hunk_diff_map: &mut HunkDiffMap, func_name_identifier: &mut FunctionNameIdentifier, added: bool) { for (filepath, file_func_diff) in hunk_diff_map.file_line_map_mut() { let file_hunks; if added { @@ -336,11 +336,12 @@ async fn set_func_def_info(hunk_diff_map: &mut HunkDiffMap, func_name_identifier .map(|index| index + 1); // Convert 0-based index to 1-based line number file_hunk.set_line_number(line_number_opt); - if let Some(func_name) = func_name_identifier.function_name_in_line(&func_line_raw, lang).await { - file_hunk.set_function_name(func_name.get_function_name().to_string()); + if let Some(lang) = detect_language(filepath) { + if let Some(func_name) = func_name_identifier.function_name_in_line(&func_line_raw, &lang).await { + file_hunk.set_function_name(func_name.get_function_name().to_string()); + } } } - // get function name from llm } } } diff --git a/vibi-dpu/src/graph/mermaid_elements.rs b/vibi-dpu/src/graph/mermaid_elements.rs index 3ce88c28..cd287b02 100644 --- a/vibi-dpu/src/graph/mermaid_elements.rs +++ b/vibi-dpu/src/graph/mermaid_elements.rs @@ -31,7 +31,7 @@ pub async fn generate_mermaid_flowchart(diff_files: &Vec, review: &Rev async fn generate_flowchart_elements(diff_files: &Vec, review: &Review) -> Option { // generate full graph for base commit id git_checkout_commit(review, review.base_head_commit()); - let base_filepaths_opt = all_code_files(review.clone_dir()); + let base_filepaths_opt = all_code_files(review.clone_dir(), diff_files); if base_filepaths_opt.is_none() { log::error!( "[generate_flowchart_elements] Unable to get file paths: {}", review.clone_dir()); @@ -39,8 +39,7 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review } let base_filepaths = base_filepaths_opt.expect("Empty base_filepaths_opt"); // let base_commit_import_info = get_test_import_info(); - let lang = "rust"; - let diff_graph_opt = generate_diff_graph(diff_files, review, lang).await; + let diff_graph_opt = generate_diff_graph(diff_files, review).await; log::debug!("[generate_flowchart_elements] diff_graph_opt = {:#?}", &diff_graph_opt); if diff_graph_opt.is_none() { log::error!( @@ -60,7 +59,7 @@ async fn generate_flowchart_elements(diff_files: &Vec, review: &Review // } // let head_filepaths = head_filepaths_opt.expect("Empty head_filepaths_opt"); let mut graph_elems = MermaidGraphElements::new(); - graph_edges(&base_filepaths, review, &diff_graph, &mut graph_elems, lang).await; + graph_edges(&base_filepaths, review, &diff_graph, &mut graph_elems).await; let elems_str = graph_elems.render_elements(review); return Some(elems_str); } \ No newline at end of file diff --git a/vibi-dpu/src/graph/utils.rs b/vibi-dpu/src/graph/utils.rs index 634eddc0..47cdf172 100644 --- a/vibi-dpu/src/graph/utils.rs +++ b/vibi-dpu/src/graph/utils.rs @@ -1,6 +1,5 @@ -use std::{collections::HashMap, path::{Path, PathBuf}, slice::Chunks}; +use std::{collections::{HashMap, HashSet}, path::{Path, PathBuf}}; -use futures_util::StreamExt; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use strsim::jaro_winkler; @@ -115,17 +114,24 @@ pub fn generate_random_string(length: usize) -> String { random_string } -pub fn all_code_files(dir: &str) -> Option> { +pub fn all_code_files(dir: &str, diff_files: &Vec) -> Option> { let mut code_files = Vec::::new(); + let all_diff_langs = detect_langs_diff(diff_files); + if all_diff_langs.is_empty() { + log::error!("[all_code_files] No known language files detected in diff"); + return None; + } for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) { let path = entry.path().to_owned(); log::debug!("[all_code_files] path = {:?}", path); let ext = path.extension().and_then(|ext| ext.to_str()); log::debug!("[all_code_files] extension = {:?}", &ext); - if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { - match path.canonicalize() { - Ok(abs_path) => code_files.push(abs_path), - Err(e) => log::error!("Failed to get absolute path for {:?}: {:?}", path, e), + if let Some(file_lang) = detect_language(&path.to_string_lossy()) { + if all_diff_langs.contains(&file_lang) { + match path.canonicalize() { + Ok(abs_path) => code_files.push(abs_path), + Err(e) => log::error!("Failed to get absolute path for {:?}: {:?}", path, e), + } } } } @@ -135,6 +141,16 @@ pub fn all_code_files(dir: &str) -> Option> { return Some(code_files); } +fn detect_langs_diff(diff_files: &Vec) -> HashSet { + let mut all_diff_langs: HashSet = HashSet::new(); + for diff_file in diff_files { + if let Some(diff_lang) = detect_language(&diff_file.filepath) { + all_diff_langs.insert(diff_lang); + } + } + return all_diff_langs; +} + pub fn match_imported_filename_to_path(paths: &Vec, filename: &str) -> Option { let relative_path = Path::new(filename); // Find the first path that matches the filename or relative path @@ -192,4 +208,93 @@ pub fn absolute_to_relative_path(abs_path: &str, review: &Review) -> Option HashMap<&'static str, &'static str> { + let mut extension_map = HashMap::new(); + + // Common programming languages + extension_map.insert("rs", "Rust"); + extension_map.insert("py", "Python"); + extension_map.insert("js", "JavaScript"); + extension_map.insert("ts", "TypeScript"); + extension_map.insert("java", "Java"); + extension_map.insert("rb", "Ruby"); + extension_map.insert("go", "Go"); + extension_map.insert("cpp", "C++"); + extension_map.insert("cs", "C#"); + extension_map.insert("c", "C"); + extension_map.insert("php", "PHP"); + extension_map.insert("swift", "Swift"); + extension_map.insert("kt", "Kotlin"); + extension_map.insert("m", "Objective-C"); + extension_map.insert("pl", "Perl"); + extension_map.insert("r", "R"); + extension_map.insert("scala", "Scala"); + extension_map.insert("dart", "Dart"); + extension_map.insert("lua", "Lua"); + extension_map.insert("hs", "Haskell"); + extension_map.insert("erl", "Erlang"); + extension_map.insert("ml", "OCaml"); + extension_map.insert("groovy", "Groovy"); + extension_map.insert("sql", "SQL"); + extension_map.insert("v", "V"); + extension_map.insert("nim", "Nim"); + extension_map.insert("elm", "Elm"); + extension_map.insert("jl", "Julia"); + extension_map.insert("cr", "Crystal"); + extension_map.insert("ex", "Elixir"); + extension_map.insert("fs", "F#"); + extension_map.insert("clj", "Clojure"); + extension_map.insert("coffee", "CoffeeScript"); + extension_map.insert("hx", "Haxe"); + extension_map.insert("lisp", "Lisp"); + extension_map.insert("scss", "Sass"); + extension_map.insert("ps1", "PowerShell"); + extension_map.insert("vb", "Visual Basic"); + extension_map.insert("bat", "Batch Script"); + extension_map.insert("matlab", "MATLAB"); + extension_map.insert("vbs", "VBScript"); + extension_map.insert("as", "ActionScript"); + extension_map.insert("rkt", "Racket"); + extension_map.insert("cls", "Apex"); + extension_map.insert("sass", "Sass"); + extension_map.insert("less", "Less"); + + // Web and markup languages + extension_map.insert("html", "HTML"); + extension_map.insert("css", "CSS"); + extension_map.insert("xml", "XML"); + extension_map.insert("md", "Markdown"); + extension_map.insert("adoc", "AsciiDoc"); + extension_map.insert("rst", "reStructuredText"); + + // Frameworks and template languages + extension_map.insert("jsx", "React JSX"); + extension_map.insert("tsx", "TypeScript"); + extension_map.insert("vue", "Vue.js"); + extension_map.insert("erb", "Ruby on Rails Embedded Ruby"); + extension_map.insert("ejs", "Express.js Embedded JavaScript"); + + // Config and data formats + extension_map.insert("json", "JSON"); + extension_map.insert("yaml", "YAML"); + extension_map.insert("toml", "TOML"); + extension_map.insert("ini", "INI Config"); + extension_map.insert("config", "Configuration File"); + + extension_map +} + +// Detect the programming language or framework based on the file extension +pub fn detect_language(file_path: &str) -> Option { + let extension_map = get_extension_map(); + let path = Path::new(file_path); + + // Extract the file extension and match it with the map + path.extension() + .and_then(|ext| ext.to_str()) + .map(|ext| ext.to_lowercase()) // Normalize to lowercase + .and_then(|ext| extension_map.get(ext.as_str()).map(|&lang| lang.to_string())) } \ No newline at end of file From d67a36a689900459344764852bf7ebf5ded2b9e1 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sat, 12 Oct 2024 15:06:28 +0530 Subject: [PATCH 41/43] fix identifying diff files with language --- vibi-dpu/src/graph/utils.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/vibi-dpu/src/graph/utils.rs b/vibi-dpu/src/graph/utils.rs index 47cdf172..12a0c3e8 100644 --- a/vibi-dpu/src/graph/utils.rs +++ b/vibi-dpu/src/graph/utils.rs @@ -167,10 +167,10 @@ pub fn source_diff_files(diff_files: &Vec) -> Option> { let mut code_files = Vec::::new(); for stat_item in diff_files { let filepath_str = &stat_item.filepath; - let filepath = Path::new(filepath_str); - if filepath.extension().and_then(|ext| ext.to_str()) == Some("rs") { + let filepath = Path::new(filepath_str); + if let Some(lang) = detect_language(&filepath_str) { code_files.push(stat_item.clone()); - } + } } if code_files.is_empty() { return None; @@ -272,17 +272,17 @@ fn get_extension_map() -> HashMap<&'static str, &'static str> { // Frameworks and template languages extension_map.insert("jsx", "React JSX"); - extension_map.insert("tsx", "TypeScript"); + extension_map.insert("tsx", "React TypeScript TSX"); extension_map.insert("vue", "Vue.js"); extension_map.insert("erb", "Ruby on Rails Embedded Ruby"); extension_map.insert("ejs", "Express.js Embedded JavaScript"); // Config and data formats - extension_map.insert("json", "JSON"); - extension_map.insert("yaml", "YAML"); - extension_map.insert("toml", "TOML"); - extension_map.insert("ini", "INI Config"); - extension_map.insert("config", "Configuration File"); + // extension_map.insert("json", "JSON"); + // extension_map.insert("yaml", "YAML"); + // extension_map.insert("toml", "TOML"); + // extension_map.insert("ini", "INI Config"); + // extension_map.insert("config", "Configuration File"); extension_map } From 81a289c7267f1835d1f577c9e7c02eda7cb03a15 Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sat, 12 Oct 2024 15:10:24 +0530 Subject: [PATCH 42/43] remove strsim --- vibi-dpu/src/graph/utils.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/vibi-dpu/src/graph/utils.rs b/vibi-dpu/src/graph/utils.rs index 12a0c3e8..c558dd24 100644 --- a/vibi-dpu/src/graph/utils.rs +++ b/vibi-dpu/src/graph/utils.rs @@ -2,7 +2,6 @@ use std::{collections::{HashMap, HashSet}, path::{Path, PathBuf}}; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; -use strsim::jaro_winkler; use walkdir::WalkDir; use std::fs; use rand::Rng; @@ -187,15 +186,6 @@ pub fn numbered_content(file_contents: String) -> Vec { return lines; } -pub fn match_overlap(str1: &str, str2: &str, similarity_threshold: f64) -> bool { - let similarity = jaro_winkler(str1, str2); - log::debug!("[match_overlap] str1 = {}, str2 = {}, similarity = {}, similarity_threshold = {}", str1, str2, similarity, similarity_threshold); - if similarity >= similarity_threshold { - return true; - } - return false; -} - pub fn absolute_to_relative_path(abs_path: &str, review: &Review) -> Option { let base_path = review.clone_dir(); let full_path = PathBuf::from(abs_path); From 0ba7fc58ad0bc27475fc4132bfe89969b010f9aa Mon Sep 17 00:00:00 2001 From: Tapish Rathore Date: Sat, 12 Oct 2024 15:13:45 +0530 Subject: [PATCH 43/43] update version and remove strsim --- vibi-dpu/Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vibi-dpu/Cargo.toml b/vibi-dpu/Cargo.toml index 2939593d..724dd0b8 100644 --- a/vibi-dpu/Cargo.toml +++ b/vibi-dpu/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "vibi-dpu" -version = "1.0.0" +version = "2.0.0" edition = "2021" authors = ["Tapish Rathore "] license = "GPL-3.0-or-later" @@ -38,5 +38,4 @@ jsonwebtoken = "8.3.0" # MIT fern = "0.6.2" # MIT log = "0.4.20" # MIT/Apache2 walkdir = "2.5.0" # Unlicence/MIT -strsim = "0.11.1" #MIT # todo - check all lib licences