From 9bb95032fb9bbb102301cd8669b15caaf49f0e1f Mon Sep 17 00:00:00 2001 From: Alexander Lyon Date: Wed, 17 Apr 2024 12:45:08 +0100 Subject: [PATCH] create turbo-static for compile time graph analysis --- Cargo.lock | 228 +++++++++++++++---- Cargo.toml | 3 +- crates/turbo-static/.gitignore | 2 + crates/turbo-static/Cargo.toml | 25 ++ crates/turbo-static/readme.md | 29 +++ crates/turbo-static/src/call_resolver.rs | 125 ++++++++++ crates/turbo-static/src/identifier.rs | 99 ++++++++ crates/turbo-static/src/lsp_client.rs | 159 +++++++++++++ crates/turbo-static/src/main.rs | 276 +++++++++++++++++++++++ crates/turbo-static/src/visitor.rs | 211 +++++++++++++++++ 10 files changed, 1114 insertions(+), 43 deletions(-) create mode 100644 crates/turbo-static/.gitignore create mode 100644 crates/turbo-static/Cargo.toml create mode 100644 crates/turbo-static/readme.md create mode 100644 crates/turbo-static/src/call_resolver.rs create mode 100644 crates/turbo-static/src/identifier.rs create mode 100644 crates/turbo-static/src/lsp_client.rs create mode 100644 crates/turbo-static/src/main.rs create mode 100644 crates/turbo-static/src/visitor.rs diff --git a/Cargo.lock b/Cargo.lock index 6a0d9cdd09f14f..b0f0aa7c8409f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1128,9 +1128,9 @@ checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" @@ -1344,9 +1344,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.31" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ "android-tzdata", "iana-time-zone", @@ -1354,7 +1354,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-targets 0.48.1", + "windows-targets 0.52.5", ] [[package]] @@ -1905,9 +1905,9 @@ checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403" [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] @@ -1981,11 +1981,10 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.8" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +checksum = "ab3db02a9c5b5121e1e42fbdb1aeb65f5e02624cc58c43f2884c6ccac0b82f95" dependencies = [ - "cfg-if", "crossbeam-utils", ] @@ -2002,15 +2001,11 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ - "autocfg", - "cfg-if", "crossbeam-utils", - "memoffset 0.9.0", - "scopeguard", ] [[package]] @@ -2024,14 +2019,21 @@ dependencies = [ ] [[package]] -name = "crossbeam-utils" -version = "0.8.16" +name = "crossbeam-skiplist" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +checksum = "df29de440c58ca2cc6e587ec3d22347551a32435fbde9d2bff64e78a9ffa151b" dependencies = [ - "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", ] +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + [[package]] name = "crossterm" version = "0.25.0" @@ -2504,6 +2506,12 @@ dependencies = [ "nom", ] +[[package]] +name = "double-ended-peekable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0d05e1c0dbad51b52c38bda7adceef61b9efc2baf04acfe8726a8c4630a6f57" + [[package]] name = "downcast-rs" version = "1.2.0" @@ -2687,9 +2695,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "fdeflate" @@ -2759,6 +2767,22 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "fjall" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caa8b3cbbdfa775c311965846c523ae291327a5cc3e433479583922ff9527594" +dependencies = [ + "byteorder", + "crc32fast", + "fs_extra", + "log", + "lsm-tree", + "path-absolutize", + "std-semaphore", + "tempfile", +] + [[package]] name = "flate2" version = "1.0.28" @@ -3210,6 +3234,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "558b88954871f5e5b2af0e62e2e176c8bde7a6c2c4ed41b13d138d96da2e2cbd" +[[package]] +name = "guardian" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6817154789d2e9bb2af0486500e774af579d0e6539247044f06d803b141448b5" + [[package]] name = "h2" version = "0.3.24" @@ -4316,9 +4346,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" dependencies = [ "value-bag", ] @@ -4363,6 +4393,42 @@ dependencies = [ "hashbrown 0.14.3", ] +[[package]] +name = "lsm-tree" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "792f0f9d75b518035f7247774580ba60ee211d381237946e1a40609433f5573f" +dependencies = [ + "byteorder", + "chrono", + "crc32fast", + "crossbeam-skiplist", + "double-ended-peekable", + "fs_extra", + "guardian", + "log", + "lz4_flex", + "path-absolutize", + "quick_cache", + "rand 0.8.5", + "seahash", + "serde", + "serde_json", + "tempfile", +] + +[[package]] +name = "lsp-server" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248f65b78f6db5d8e1b1604b4098a28b43d21a8eb1deeca22b1c421b276c7095" +dependencies = [ + "crossbeam-channel", + "log", + "serde", + "serde_json", +] + [[package]] name = "lsp-types" version = "0.94.1" @@ -4376,6 +4442,28 @@ dependencies = [ "url", ] +[[package]] +name = "lsp-types" +version = "0.95.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e34d33a8e9b006cd3fc4fe69a921affa097bae4bb65f76271f4644f9a334365" +dependencies = [ + "bitflags 1.3.2", + "serde", + "serde_json", + "serde_repr", + "url", +] + +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash", +] + [[package]] name = "mach" version = "0.3.2" @@ -5303,6 +5391,15 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "path-absolutize" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4af381fe79fa195b4909485d99f73a80792331df0625188e707854f0b3383f5" +dependencies = [ + "path-dedot", +] + [[package]] name = "path-clean" version = "0.1.0" @@ -5315,6 +5412,15 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" +[[package]] +name = "path-dedot" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07ba0ad7e047712414213ff67533e6dd477af0a4e1d14fb52343e53d30ea9397" +dependencies = [ + "once_cell", +] + [[package]] name = "path-slash" version = "0.2.1" @@ -5999,6 +6105,16 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" +[[package]] +name = "quick_cache" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "347e1a588d1de074eeb3c00eadff93db4db65aeb62aee852b1efd0949fe65b6c" +dependencies = [ + "equivalent", + "hashbrown 0.14.3", +] + [[package]] name = "quickcheck" version = "1.0.3" @@ -6849,9 +6965,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.197" +version = "1.0.201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "780f1cebed1629e4753a1a38a3c72d30b97ec044f0aef68cb26650a3c5cf363c" dependencies = [ "serde_derive", ] @@ -6888,9 +7004,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "c5e405930b9796f1c00bee880d03fc7e0bb4b9a11afc776885ffe84320da2865" dependencies = [ "proc-macro2", "quote", @@ -6910,9 +7026,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.115" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" dependencies = [ "indexmap 2.2.3", "itoa", @@ -6922,10 +7038,11 @@ dependencies = [ [[package]] name = "serde_path_to_error" -version = "0.1.11" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7f05c1d5476066defcdfacce1f52fc3cae3af1d3089727100c02ae92e5abbe0" +checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6" dependencies = [ + "itoa", "serde", ] @@ -7445,6 +7562,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "std-semaphore" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ae9eec00137a8eed469fb4148acd9fc6ac8c3f9b110f52cd34698c8b5bfa0e" + [[package]] name = "stop-token" version = "0.7.0" @@ -9181,15 +9304,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.8.0" +version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", - "fastrand 2.0.0", - "redox_syscall 0.3.5", + "fastrand 2.1.0", "rustix 0.38.31", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -9781,7 +9903,7 @@ dependencies = [ "dashmap", "futures", "httparse", - "lsp-types", + "lsp-types 0.94.1", "memchr", "serde", "serde_json", @@ -10044,6 +10166,28 @@ dependencies = [ "winapi", ] +[[package]] +name = "turbo-static" +version = "0.1.0" +dependencies = [ + "bincode", + "clap 4.5.2", + "crossbeam-channel", + "fjall", + "ignore", + "itertools 0.10.5", + "lsp-server", + "lsp-types 0.95.1", + "proc-macro2", + "serde", + "serde_json", + "serde_path_to_error", + "syn 2.0.58", + "tracing", + "tracing-subscriber", + "walkdir", +] + [[package]] name = "turbo-tasks" version = "0.1.0" @@ -11707,9 +11851,9 @@ checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" [[package]] name = "value-bag" -version = "1.4.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4d330786735ea358f3bc09eea4caa098569c1c93f342d9aca0514915022fe7e" +checksum = "5a84c137d37ab0142f0f2ddfe332651fdbf252e7b7dbb4e67b6c1f1b2e925101" [[package]] name = "vcpkg" @@ -11968,9 +12112,9 @@ checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" [[package]] name = "walkdir" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", diff --git a/Cargo.toml b/Cargo.toml index a03b0b7aed82aa..01634ba05709a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,8 @@ resolver = "2" members = [ "crates/node-file-trace", - "crates/tower-uds", + "crates/tower-uds", + "crates/turbo-static", "crates/turbo-tasks*", "crates/turbopack*", "crates/turborepo*", diff --git a/crates/turbo-static/.gitignore b/crates/turbo-static/.gitignore new file mode 100644 index 00000000000000..037a75e60775dd --- /dev/null +++ b/crates/turbo-static/.gitignore @@ -0,0 +1,2 @@ +file +graph.cypherl diff --git a/crates/turbo-static/Cargo.toml b/crates/turbo-static/Cargo.toml new file mode 100644 index 00000000000000..a5ec2982b907aa --- /dev/null +++ b/crates/turbo-static/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "turbo-static" +version = "0.1.0" +edition = "2021" + +[dependencies] +bincode = "1.3.3" +clap = { workspace = true, features = ["derive"] } +crossbeam-channel = "0.5.12" +fjall = { version = "0.6.3", features = ["bloom"] } +ignore = "0.4.22" +itertools.workspace = true +lsp-server = "0.7.6" +lsp-types = "0.95.1" +proc-macro2 = { workspace = true, features = ["span-locations"] } +serde = { workspace = true, features = ["derive"] } +serde_json.workspace = true +serde_path_to_error = "0.1.16" +syn = { version = "2", features = ["parsing", "full", "visit", "extra-traits"] } +tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } +tracing.workspace = true +walkdir = "2.5.0" + +[lints] +workspace = true diff --git a/crates/turbo-static/readme.md b/crates/turbo-static/readme.md new file mode 100644 index 00000000000000..47a5fde4c17143 --- /dev/null +++ b/crates/turbo-static/readme.md @@ -0,0 +1,29 @@ +# Turbo Static + +Leverages rust-analyzer to build a complete view into the static dependency graph for +your turbo tasks project. + +## How it works + +- find all occurences of #[turbo_tasks::function] across all the packages you want to query +- for each of the tasks we find, query rust analyzer to see which tasks call them +- apply some very basis control flow analysis to determine whether the call is make 1 time, 0/1 times, or 0+ times, + corresponding to direct calls, conditionals, or for loops. nested conditionals collapse + +## Usage + +This uses an in memory persisted database to cache rust-analyzer queries. +To reset the cache, pass the `--reindex` flag. Running will produce a +`graph.cypherl` file which can be loaded into any cypher-compatible database. + +```bash +# run neoj4 +docker run \ + --publish=7474:7474 --publish=7687:7687 \ + --volume=$HOME/neo4j/data:/data \ + neo4j +# run it passing in the root folders you want to analyze. +# the system will recursively parse all rust code looking +# for turbo tasks functions +cargo run --release -- ../../../turbo ../../../next.js +``` diff --git a/crates/turbo-static/src/call_resolver.rs b/crates/turbo-static/src/call_resolver.rs new file mode 100644 index 00000000000000..0c1d659a5e11b2 --- /dev/null +++ b/crates/turbo-static/src/call_resolver.rs @@ -0,0 +1,125 @@ +use fjall::PartitionCreateOptions; + +use crate::{lsp_client::RAClient, Identifier, IdentifierReference}; + +pub struct CallResolver<'a> { + client: &'a mut RAClient, + fjall: &'a fjall::Keyspace, + handle: fjall::PartitionHandle, +} + +impl<'a> CallResolver<'a> { + pub fn new(client: &'a mut RAClient, fjall: &'a fjall::Keyspace) -> Self { + let handle = fjall + .open_partition("links", PartitionCreateOptions::default()) + .unwrap(); + Self { + client, + fjall, + handle, + } + } + + pub fn cached(&self) -> usize { + self.handle.len().unwrap() + } + + pub fn cleared(mut self) -> Self { + self.fjall.delete_partition(self.handle).unwrap(); + self.handle = self + .fjall + .open_partition("links", PartitionCreateOptions::default()) + .unwrap(); + self + } + + pub fn resolve(&mut self, ident: &Identifier) -> Vec { + if let Some(data) = self.handle.get(ident.to_string()).unwrap() { + tracing::info!("skipping {}", ident); + return bincode::deserialize(&data).unwrap(); + }; + + tracing::info!("checking {}", ident); + + let mut count = 0; + let _response = loop { + let response = self.client.request(lsp_server::Request { + id: 1.into(), + method: "textDocument/prepareCallHierarchy".to_string(), + params: serde_json::to_value(&lsp_types::CallHierarchyPrepareParams { + text_document_position_params: lsp_types::TextDocumentPositionParams { + position: ident.range.start, + text_document: lsp_types::TextDocumentIdentifier { + uri: lsp_types::Url::from_file_path(&ident.path).unwrap(), + }, + }, + work_done_progress_params: lsp_types::WorkDoneProgressParams { + work_done_token: Some(lsp_types::ProgressToken::String( + "prepare".to_string(), + )), + }, + }) + .unwrap(), + }); + if let Some(Some(value)) = response.result.as_ref().map(|r| r.as_array()) { + if !value.is_empty() { + break value.to_owned(); + } + count += 1; + } + + // textDocument/prepareCallHierarchy will sometimes return an empty array so try + // at most 5 times + if count > 5 { + tracing::warn!("discovered isolated task {}", ident); + break vec![]; + } + + std::thread::sleep(std::time::Duration::from_secs(1)); + }; + + // callHierarchy/incomingCalls + let response = self.client.request(lsp_server::Request { + id: 1.into(), + method: "callHierarchy/incomingCalls".to_string(), + params: serde_json::to_value(lsp_types::CallHierarchyIncomingCallsParams { + partial_result_params: lsp_types::PartialResultParams::default(), + item: lsp_types::CallHierarchyItem { + name: ident.name.to_owned(), + kind: lsp_types::SymbolKind::FUNCTION, + data: None, + tags: None, + detail: None, + uri: lsp_types::Url::from_file_path(&ident.path).unwrap(), + range: ident.range, + selection_range: ident.range, + }, + work_done_progress_params: lsp_types::WorkDoneProgressParams { + work_done_token: Some(lsp_types::ProgressToken::String("prepare".to_string())), + }, + }) + .unwrap(), + }); + + let links = if let Some(e) = response.error { + tracing::warn!("unable to resolve {}: {:?}", ident, e); + vec![] + } else { + let response: Result, _> = + serde_path_to_error::deserialize(response.result.unwrap()); + + response + .unwrap() + .into_iter() + .map(|i| i.into()) + .collect::>() + }; + + let data = bincode::serialize(&links).unwrap(); + + tracing::debug!("links: {:?}", links); + + self.handle.insert(ident.to_string(), data).unwrap(); + links + } +} diff --git a/crates/turbo-static/src/identifier.rs b/crates/turbo-static/src/identifier.rs new file mode 100644 index 00000000000000..c6f62f2f0b670a --- /dev/null +++ b/crates/turbo-static/src/identifier.rs @@ -0,0 +1,99 @@ +use std::{fs, path::PathBuf}; + +use lsp_types::{CallHierarchyIncomingCall, CallHierarchyItem, Range}; + +/// A task that references another, with the range of the reference +#[derive(Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize, Clone, Debug)] +pub struct IdentifierReference { + pub identifier: Identifier, + pub references: Vec, // the places where this identifier is used +} + +/// identifies a task by its file, and range in the file +#[derive(Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize, Clone)] +pub struct Identifier { + pub path: String, + // technically you can derive this from the name and range but it's easier to just store it + pub name: String, + // post_transform_name: Option, + pub range: lsp_types::Range, +} + +impl Identifier { + /// check the span matches and the text matches + /// + /// `same_location` is used to check if the location of the identifier is + /// the same as the other + pub fn equals_ident(&self, other: &syn::Ident, match_location: bool) -> bool { + *other == self.name + && (!match_location + || (self.range.start.line == other.span().start().line as u32 + && self.range.start.character == other.span().start().column as u32)) + } + + fn get_name(item: &CallHierarchyItem) -> String { + // open file, find range inside, extract text + let file = fs::read_to_string(item.uri.path()).unwrap(); + let start = item.selection_range.start; + let end = item.selection_range.end; + file.lines() + .nth(start.line as usize) + .unwrap() + .chars() + .skip(start.character as usize) + .take(end.character as usize - start.character as usize) + .collect() + } +} + +impl From<(PathBuf, syn::Ident)> for Identifier { + fn from((path, ident): (PathBuf, syn::Ident)) -> Self { + Self { + path: path.display().to_string(), + name: ident.to_string(), + // post_transform_name: None, + range: Range { + start: lsp_types::Position { + line: ident.span().start().line as u32 - 1, + character: ident.span().start().column as u32, + }, + end: lsp_types::Position { + line: ident.span().end().line as u32 - 1, + character: ident.span().end().column as u32, + }, + }, + } + } +} + +impl From for IdentifierReference { + fn from(item: CallHierarchyIncomingCall) -> Self { + Self { + identifier: Identifier { + name: Identifier::get_name(&item.from), + // post_transform_name: Some(item.from.name), + path: item.from.uri.path().to_owned(), + range: item.from.selection_range, + }, + references: item.from_ranges, + } + } +} + +impl std::fmt::Debug for Identifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self, f) + } +} + +impl std::fmt::Display for Identifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}:{}#{}", + self.path, + self.range.start.line, + self.name.to_string(), + ) + } +} diff --git a/crates/turbo-static/src/lsp_client.rs b/crates/turbo-static/src/lsp_client.rs new file mode 100644 index 00000000000000..c6627c82217d96 --- /dev/null +++ b/crates/turbo-static/src/lsp_client.rs @@ -0,0 +1,159 @@ +use std::{path::PathBuf, process}; + +use crossbeam_channel::bounded; +use lsp_server::Message; + +/// An LSP client for Rust Analyzer (RA) that launches it as a subprocess. +pub struct RAClient { + /// Handle to the client + handle: process::Child, + sender: Option>, + receiver: Option>, +} + +impl RAClient { + /// Create a new LSP client for Rust Analyzer. + pub fn new() -> Self { + let stdin = process::Stdio::piped(); + let stdout = process::Stdio::piped(); + let stderr = process::Stdio::inherit(); + + let child = process::Command::new("rust-analyzer") + .stdin(stdin) + .stdout(stdout) + .stderr(stderr) + // .env("RA_LOG", "info") + .env("RUST_BACKTRACE", "1") + .spawn() + .expect("Failed to start RA LSP server"); + Self { + handle: child, + sender: None, + receiver: None, + } + } + + pub fn start(&mut self, folders: &[PathBuf]) { + let stdout = self.handle.stdout.take().unwrap(); + let mut stdin = self.handle.stdin.take().unwrap(); + + let (writer_sender, writer_receiver) = bounded::(0); + _ = std::thread::spawn(move || { + writer_receiver + .into_iter() + .try_for_each(|it| it.write(&mut stdin)) + }); + + let (reader_sender, reader_receiver) = bounded::(0); + _ = std::thread::spawn(move || { + let mut reader = std::io::BufReader::new(stdout); + while let Ok(Some(msg)) = Message::read(&mut reader) { + reader_sender + .send(msg) + .expect("receiver was dropped, failed to send a message"); + } + }); + + self.sender = Some(writer_sender); + self.receiver = Some(reader_receiver); + + let workspace_paths = folders + .iter() + .map(|p| std::fs::canonicalize(p).unwrap()) + .map(|p| lsp_types::WorkspaceFolder { + name: p.file_name().unwrap().to_string_lossy().to_string(), + uri: lsp_types::Url::from_file_path(p).unwrap(), + }) + .collect::>(); + + _ = self.request(lsp_server::Request { + id: 1.into(), + method: "initialize".to_string(), + params: serde_json::to_value(lsp_types::InitializeParams { + workspace_folders: Some(workspace_paths), + process_id: Some(std::process::id()), + capabilities: lsp_types::ClientCapabilities { + workspace: Some(lsp_types::WorkspaceClientCapabilities { + workspace_folders: Some(true), + ..Default::default() + }), + ..Default::default() + }, + work_done_progress_params: lsp_types::WorkDoneProgressParams { + work_done_token: Some(lsp_types::ProgressToken::String("prepare".to_string())), + }, + // we use workspace_folders so root_path and root_uri can be + // empty + ..Default::default() + }) + .unwrap(), + }); + + self.notify(lsp_server::Notification { + method: "initialized".to_string(), + params: serde_json::to_value(lsp_types::InitializedParams {}).unwrap(), + }); + } + + pub fn request(&mut self, message: lsp_server::Request) -> lsp_server::Response { + tracing::debug!("sending {:?}", message); + self.sender + .as_mut() + .unwrap() + .send(Message::Request(message)) + .expect("failed to send message"); + + loop { + match self + .receiver + .as_mut() + .unwrap() + .recv() + .expect("failed to receive message") + { + lsp_server::Message::Response(response) => { + tracing::debug!("received {:?}", response); + return response; + } + m => tracing::trace!("unexpected message: {:?}", m), + } + } + } + + pub fn notify(&mut self, message: lsp_server::Notification) { + self.sender + .as_mut() + .unwrap() + .send(Message::Notification(message)) + .expect("failed to send message"); + } +} + +impl Drop for RAClient { + fn drop(&mut self) { + if self.sender.is_some() { + let resp = self.request(lsp_server::Request { + id: 1.into(), + method: "shutdown".to_string(), + params: serde_json::to_value(()).unwrap(), + }); + + if resp.error.is_none() { + tracing::info!("shutting down RA LSP server"); + self.notify(lsp_server::Notification { + method: "exit".to_string(), + params: serde_json::to_value(()).unwrap(), + }); + self.handle + .wait() + .expect("failed to wait for RA LSP server"); + tracing::info!("shut down RA LSP server"); + } else { + tracing::error!("failed to shutdown RA LSP server: {:#?}", resp); + } + } + + self.sender = None; + self.receiver = None; + } +} diff --git a/crates/turbo-static/src/main.rs b/crates/turbo-static/src/main.rs new file mode 100644 index 00000000000000..9c02d7566db06f --- /dev/null +++ b/crates/turbo-static/src/main.rs @@ -0,0 +1,276 @@ +use std::{ + collections::{HashMap, HashSet}, + error::Error, + fs, + path::PathBuf, +}; + +use call_resolver::CallResolver; +use clap::Parser; +use fjall::Config; +use identifier::{Identifier, IdentifierReference}; +use itertools::Itertools; +use syn::visit::Visit; +use visitor::CallingStyleVisitor; + +use crate::visitor::CallingStyle; + +mod call_resolver; +mod identifier; +mod lsp_client; +mod visitor; + +#[derive(Parser)] +struct Opt { + #[clap(required = true)] + paths: Vec, + + /// reparse all files + #[clap(long)] + reparse: bool, + + /// reindex all files + #[clap(long)] + reindex: bool, +} + +fn main() -> Result<(), Box> { + tracing_subscriber::fmt::init(); + let opt = Opt::parse(); + + let mut connection = lsp_client::RAClient::new(); + connection.start(&opt.paths); + + // Each partition is its own physical LSM-tree + let fjall = Config::new("file").open()?; + + let call_resolver = CallResolver::new(&mut connection, &fjall); + let mut call_resolver = if opt.reindex { + call_resolver.cleared() + } else { + call_resolver + }; + + tracing::info!("getting tasks"); + let mut tasks = get_all_tasks(&opt.paths); + let dep_tree = resolve_tasks(&mut tasks, &mut call_resolver); + let concurrency = resolve_concurrency(&tasks, &dep_tree); + + write_dep_tree(&tasks, concurrency, std::path::Path::new("graph.cypherl")); + + Ok(()) +} + +/// search the given folders recursively and attempt to find all tasks inside +#[tracing::instrument(skip_all)] +fn get_all_tasks(folders: &[PathBuf]) -> HashMap> { + let mut out = HashMap::new(); + + for folder in folders { + let walker = ignore::Walk::new(folder); + for entry in walker { + let entry = entry.unwrap(); + let rs_file = if let Some(true) = entry.file_type().map(|t| t.is_file()) { + let path = entry.path(); + let ext = path.extension().unwrap_or_default(); + if ext == "rs" { + std::fs::canonicalize(path).unwrap() + } else { + continue; + } + } else { + continue; + }; + + let file = fs::read_to_string(&rs_file).unwrap(); + let lines = file.lines(); + let mut occurences = vec![]; + + tracing::debug!("processing {}", rs_file.display()); + + for ((_, line), (line_no, _)) in lines.enumerate().tuple_windows() { + if line.contains("turbo_tasks::function") { + tracing::debug!("found at {:?}:L{}", rs_file, line_no); + occurences.push(line_no + 1); + } + } + + if occurences.is_empty() { + continue; + } + + // parse the file using syn and get the span of the functions + let file = syn::parse_file(&file).unwrap(); + let occurences_count = occurences.len(); + let mut visitor = visitor::TaskVisitor::new(); + syn::visit::visit_file(&mut visitor, &file); + if visitor.results.len() != occurences_count { + tracing::warn!( + "file {:?} passed the heuristic with {:?} but the visitor found {:?}", + rs_file, + occurences_count, + visitor.results.len() + ); + } + + out.extend( + visitor + .results + .into_iter() + .map(move |(ident, tags)| ((rs_file.clone(), ident).into(), tags)), + ) + } + } + + out +} + +/// Given a list of tasks, get all the tasks that call that one +fn resolve_tasks( + tasks: &mut HashMap>, + client: &mut CallResolver, +) -> HashMap> { + tracing::info!( + "found {} tasks, of which {} cached", + tasks.len(), + client.cached() + ); + + let mut unresolved = tasks.keys().cloned().collect::>(); + let mut resolved = HashMap::new(); + + while let Some(top) = unresolved.iter().next().cloned() { + unresolved.remove(&top); + + let callers = client.resolve(&top); + + // add all non-task callers to the unresolved list if they are not in the + // resolved list + for caller in callers.iter() { + if !resolved.contains_key(&caller.identifier) + && !unresolved.contains(&caller.identifier) + { + tracing::debug!("adding {} to unresolved", caller.identifier); + unresolved.insert(caller.identifier.to_owned()); + } + } + resolved.insert(top.to_owned(), callers); + } + + resolved +} + +/// given a map of tasks and functions that call it, produce a map of tasks and +/// those tasks that it calls +/// +/// returns a list of pairs with a task, the task that calls it, and the calling +/// style +fn resolve_concurrency( + task_list: &HashMap>, + dep_tree: &HashMap>, // pairs of tasks and call trees +) -> Vec<(Identifier, Identifier, CallingStyle)> { + // println!("{:?}", dep_tree); + // println!("{:#?}", task_list); + + let mut edges = vec![]; + + for (ident, references) in dep_tree { + for reference in references { + if !dep_tree.contains_key(&reference.identifier) { + // this is a task that is not in the task list + // so we can't resolve it + tracing::error!("missing task for {}: {}", ident, reference.identifier); + for task in task_list.keys() { + if task.name == reference.identifier.name { + // we found a task that is not in the task list + // so we can't resolve it + tracing::trace!("- found {}", task); + continue; + } + } + continue; + } else { + // load the source file and get the calling style + let mut visitor = CallingStyleVisitor::new(reference.to_owned()); + tracing::info!("looking for {} from {}", ident, reference.identifier); + let file = + syn::parse_file(&fs::read_to_string(&reference.identifier.path).unwrap()) + .unwrap(); + visitor.visit_file(&file); + + edges.push(( + ident.clone(), + reference.identifier.clone(), + visitor.result().unwrap_or(CallingStyle::Once), + )); + } + .clone() + } + } + + // parse each fn between parent and child and get the max calling style + + edges +} + +/// Write the dep tree into the given file using cypher syntax +fn write_dep_tree( + task_list: &HashMap>, + dep_tree: Vec<(Identifier, Identifier, CallingStyle)>, + out: &std::path::Path, +) { + use std::io::Write; + + let mut node_ids = HashMap::new(); + let mut counter = 0; + + let mut file = std::fs::File::create(out).unwrap(); + + let empty = vec![]; + + // collect all tasks as well as all intermediate nodes + // tasks come last to ensure the tags are preserved + let node_list = dep_tree + .iter() + .flat_map(|(dest, src, _)| [(src, &empty), (dest, &empty)]) + .chain(task_list) + .collect::>(); + + for (ident, tags) in node_list { + counter += 1; + + let label = if !task_list.contains_key(ident) { + "Function" + } else if tags.contains(&"fs".to_string()) || tags.contains(&"network".to_string()) { + "ImpureTask" + } else { + "Task" + }; + + _ = writeln!( + file, + "CREATE (n_{}:{} {{name: '{}', file: '{}', line: {}, tags: [{}]}})", + counter, + label, + ident.name, + ident.path, + ident.range.start.line, + tags.iter().map(|t| format!("\"{}\"", t)).join(",") + ); + node_ids.insert(ident, counter); + } + + for (dest, src, style) in &dep_tree { + let style = match style { + CallingStyle::Once => "ONCE", + CallingStyle::ZeroOrOnce => "ZERO_OR_ONCE", + CallingStyle::ZeroOrMore => "ZERO_OR_MORE", + CallingStyle::OneOrMore => "ONE_OR_MORE", + }; + + let src_id = *node_ids.get(src).unwrap(); + let dst_id = *node_ids.get(dest).unwrap(); + + _ = writeln!(file, "CREATE (n_{})-[:{}]->(n_{})", src_id, style, dst_id,); + } +} diff --git a/crates/turbo-static/src/visitor.rs b/crates/turbo-static/src/visitor.rs new file mode 100644 index 00000000000000..30105c467d8c4d --- /dev/null +++ b/crates/turbo-static/src/visitor.rs @@ -0,0 +1,211 @@ +//! A visitor that traverses the AST and collects all functions or methods that +//! are annotated with `#[turbo_tasks::function]`. + +use std::{collections::VecDeque, ops::Add}; + +use syn::{spanned::Spanned, visit::Visit, Expr, Meta}; + +pub struct TaskVisitor { + /// the list of results as pairs of an identifier and its tags + pub results: Vec<(syn::Ident, Vec)>, +} + +impl TaskVisitor { + pub fn new() -> Self { + Self { + results: Default::default(), + } + } +} + +impl Visit<'_> for TaskVisitor { + #[tracing::instrument(skip_all)] + fn visit_item_fn(&mut self, i: &syn::ItemFn) { + if let Some(tags) = extract_tags(i.attrs.iter()) { + tracing::trace!("L{}: {}", i.sig.ident.span().start().line, i.sig.ident,); + self.results.push((i.sig.ident.clone(), tags)); + } + } + + #[tracing::instrument(skip_all)] + fn visit_impl_item_fn(&mut self, i: &syn::ImplItemFn) { + if let Some(tags) = extract_tags(i.attrs.iter()) { + tracing::trace!("L{}: {}", i.sig.ident.span().start().line, i.sig.ident,); + self.results.push((i.sig.ident.clone(), tags)); + } + } +} + +fn extract_tags<'a>(mut meta: impl Iterator) -> Option> { + meta.find_map(|a| match &a.meta { + // path has two segments, turbo_tasks and function + Meta::Path(path) if path.segments.len() == 2 => { + let first = &path.segments[0]; + let second = &path.segments[1]; + (first.ident == "turbo_tasks" && second.ident == "function").then(std::vec::Vec::new) + } + Meta::List(list) if list.path.segments.len() == 2 => { + let first = &list.path.segments[0]; + let second = &list.path.segments[1]; + if first.ident != "turbo_tasks" || second.ident != "function" { + return None; + } + + // collect ident tokens as args + let tags: Vec<_> = list + .tokens + .clone() + .into_iter() + .filter_map(|t| { + if let proc_macro2::TokenTree::Ident(ident) = t { + Some(ident.to_string()) + } else { + None + } + }) + .collect(); + + Some(tags) + } + _ => { + tracing::trace!("skipping unknown annotation"); + None + } + }) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd)] +pub enum CallingStyle { + Once, + #[allow(dead_code)] + ZeroOrOnce, + #[allow(dead_code)] + ZeroOrMore, + #[allow(dead_code)] + OneOrMore, +} + +impl CallingStyle { + fn bitset(self) -> u8 { + match self { + CallingStyle::Once => 0b0010, + CallingStyle::ZeroOrOnce => 0b011, + CallingStyle::ZeroOrMore => 0b0111, + CallingStyle::OneOrMore => 0b0110, + } + } +} + +impl Add for CallingStyle { + type Output = Self; + + /// Add two calling styles together to determine the calling style of the + /// target function within the source function. + /// + /// Consider it as a bitset over properties. + /// - 0b0001: Zero + /// - 0b0010: Once + /// - 0b0100: More Than Once + /// + /// Note that zero is not a valid calling style. + fn add(self, rhs: Self) -> Self { + let left = self.bitset(); + let right = rhs.bitset(); + match left | right { + 0b0010 => CallingStyle::Once, + 0b011 => CallingStyle::ZeroOrOnce, + 0b0111 => CallingStyle::ZeroOrMore, + 0b0110 => CallingStyle::OneOrMore, + _ => unreachable!(), + } + } +} + +pub struct CallingStyleVisitor { + pub reference: crate::IdentifierReference, + state: VecDeque, +} + +impl CallingStyleVisitor { + /// Create a new visitor that will traverse the AST and determine the + /// calling style of the target function within the source function. + pub fn new(reference: crate::IdentifierReference) -> Self { + Self { + reference, + state: Default::default(), + } + } + + pub fn result(self) -> Option { + self.state + .into_iter() + .map(|b| match b { + CallingStyleVisitorState::Block => CallingStyle::Once, + CallingStyleVisitorState::Loop => CallingStyle::ZeroOrMore, + CallingStyleVisitorState::If => CallingStyle::ZeroOrOnce, + CallingStyleVisitorState::Closure => CallingStyle::ZeroOrMore, + }) + .reduce(|a, b| a + b) + } +} + +#[derive(Debug, Clone, Copy)] +enum CallingStyleVisitorState { + Block, + Loop, + If, + Closure, +} + +impl Visit<'_> for CallingStyleVisitor { + fn visit_item_fn(&mut self, i: &'_ syn::ItemFn) { + if self.reference.identifier.equals_ident(&i.sig.ident, true) { + self.state.push_back(CallingStyleVisitorState::Block); + syn::visit::visit_item_fn(self, i); + self.state.pop_back(); + } + } + + fn visit_impl_item_fn(&mut self, i: &'_ syn::ImplItemFn) { + if self.reference.identifier.equals_ident(&i.sig.ident, true) { + self.state.push_back(CallingStyleVisitorState::Block); + syn::visit::visit_impl_item_fn(self, i); + self.state.pop_back(); + } + } + + fn visit_expr_loop(&mut self, i: &'_ syn::ExprLoop) { + self.state.push_back(CallingStyleVisitorState::Loop); + syn::visit::visit_expr_loop(self, i); + self.state.pop_back(); + } + + fn visit_expr_for_loop(&mut self, i: &'_ syn::ExprForLoop) { + self.state.push_back(CallingStyleVisitorState::Loop); + syn::visit::visit_expr_for_loop(self, i); + self.state.pop_back(); + } + + fn visit_expr_if(&mut self, i: &'_ syn::ExprIf) { + self.state.push_back(CallingStyleVisitorState::If); + syn::visit::visit_expr_if(self, i); + self.state.pop_back(); + } + + fn visit_expr_closure(&mut self, i: &'_ syn::ExprClosure) { + self.state.push_back(CallingStyleVisitorState::Closure); + syn::visit::visit_expr_closure(self, i); + self.state.pop_back(); + } + + fn visit_expr_call(&mut self, i: &'_ syn::ExprCall) { + match i.func.as_ref() { + Expr::Path(p) => { + println!("{:?} - {:?}", p.span(), self.reference.references) + } + rest => { + tracing::info!("visiting call: {:?}", rest); + } + } + } +}