diff --git a/Cargo.lock b/Cargo.lock index 029bc62a..99f35245 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -87,9 +87,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.6" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" [[package]] name = "anstyle-parse" @@ -156,7 +156,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -167,7 +167,7 @@ checksum = "461abc97219de0eaaf81fe3ef974a540158f3d079c2ab200f891f1a2ef201e85" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -338,7 +338,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.53", + "syn 2.0.74", "which", ] @@ -386,9 +386,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.3" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210" +checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" dependencies = [ "arrayref", "arrayvec", @@ -429,9 +429,9 @@ checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c" [[package]] name = "bytemuck" -version = "1.16.1" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" +checksum = "6fd4c6dcc3b0aea2f5c0b4b82c2b15fe39ddbc76041a310848f4706edf76bb31" [[package]] name = "byteorder" @@ -453,27 +453,30 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cbindgen" -version = "0.26.0" +version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da6bc11b07529f16944307272d5bd9b22530bc7d05751717c9d416586cedab49" +checksum = "3fce8dd7fcfcbf3a0a87d8f515194b49d6135acab73e18bd380d1d93bb1a15eb" dependencies = [ "heck 0.4.1", - "indexmap 1.9.3", + "indexmap 2.2.5", "log", "proc-macro2", "quote", "serde", "serde_json", - "syn 1.0.109", + "syn 2.0.74", "tempfile", - "toml 0.5.11", + "toml", ] [[package]] name = "cc" -version = "1.0.90" +version = "1.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" +checksum = "72db2f7947ecee9b03b510377e8bb9077afa27176fdbff55c51027e976fdcc48" +dependencies = [ + "shlex", +] [[package]] name = "certifier" @@ -592,9 +595,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.11" +version = "4.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35723e6a11662c2afb578bcf0b88bf6ea8e21282a953428f240574fcc3a2b5b3" +checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" dependencies = [ "clap_builder", "clap_derive", @@ -602,9 +605,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.11" +version = "4.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49eb96cbfa7cfa35017b7cd548c75b14c3118c98b423041d70562665e07fb0fa" +checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6" dependencies = [ "anstream", "anstyle", @@ -614,14 +617,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.11" +version = "4.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e" +checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -652,7 +655,7 @@ dependencies = [ "rust-ini", "serde", "serde_json", - "toml 0.8.12", + "toml", "yaml-rust", ] @@ -884,7 +887,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -908,7 +911,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -919,7 +922,7 @@ checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -1239,7 +1242,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -1946,7 +1949,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -2156,7 +2159,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -2295,7 +2298,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -2336,7 +2339,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -2522,7 +2525,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" dependencies = [ "proc-macro2", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -2547,9 +2550,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.79" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] @@ -2619,7 +2622,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.53", + "syn 2.0.74", "tempfile", ] @@ -2633,7 +2636,7 @@ dependencies = [ "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -2811,9 +2814,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.5" +version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" dependencies = [ "aho-corasick", "memchr", @@ -2861,9 +2864,9 @@ checksum = "e898588f33fdd5b9420719948f9f2a32c922a246964576f71ba7f24f80610fbc" [[package]] name = "reqwest" -version = "0.12.5" +version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63" dependencies = [ "base64 0.22.1", "bytes", @@ -2899,7 +2902,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "winreg", + "windows-registry", ] [[package]] @@ -2950,9 +2953,9 @@ dependencies = [ [[package]] name = "rstest" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afd55a67069d6e434a95161415f5beeada95a01c7b815508a82dcb0e1593682" +checksum = "7b423f0e62bdd61734b67cd21ff50871dfaeb9cc74f869dcd6af974fbcb19936" dependencies = [ "futures 0.3.30", "futures-timer", @@ -2962,9 +2965,9 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4165dfae59a39dd41d8dec720d3cbfbc71f69744efb480a3920f5d4e0cc6798d" +checksum = "c5e1711e7d14f74b12a58411c542185ef7fb7f2e7f8ee6e2940a883628522b42" dependencies = [ "cfg-if", "glob", @@ -2974,7 +2977,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.53", + "syn 2.0.74", "unicode-ident", ] @@ -3215,31 +3218,32 @@ checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.209" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.209" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] name = "serde_json" -version = "1.0.120" +version = "1.0.124" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -3302,7 +3306,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -3488,9 +3492,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.53" +version = "2.0.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7383cd0e49fff4b6b90ca5670bfd3e9d6a733b3f90c686605aa7eec8c4996032" +checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7" dependencies = [ "proc-macro2", "quote", @@ -3508,6 +3512,9 @@ name = "sync_wrapper" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384595c11a4e2969895cad5a8c4029115f5ab956a9e5ef4de79d11a426e5f20c" +dependencies = [ + "futures-core", +] [[package]] name = "sysinfo" @@ -3526,20 +3533,20 @@ dependencies = [ [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.5.0", "core-foundation", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -3553,14 +3560,15 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tempfile" -version = "3.10.1" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" dependencies = [ "cfg-if", "fastrand", + "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3586,7 +3594,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -3666,9 +3674,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.39.2" +version = "1.39.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1" +checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5" dependencies = [ "backtrace", "bytes", @@ -3688,7 +3696,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -3737,15 +3745,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - [[package]] name = "toml" version = "0.8.12" @@ -3833,7 +3832,7 @@ dependencies = [ "proc-macro2", "prost-build", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -3904,7 +3903,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -4120,7 +4119,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", "wasm-bindgen-shared", ] @@ -4154,7 +4153,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4268,7 +4267,7 @@ checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] @@ -4279,7 +4278,18 @@ checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", +] + +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", ] [[package]] @@ -4303,18 +4313,18 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.48.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] name = "windows-sys" -version = "0.52.0" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets 0.52.6", ] @@ -4458,16 +4468,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "wyz" version = "0.5.1" @@ -4518,7 +4518,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.53", + "syn 2.0.74", ] [[package]] diff --git a/docs/profiler.md b/docs/profiler.md index 10428617..91fac40f 100644 --- a/docs/profiler.md +++ b/docs/profiler.md @@ -1,136 +1,195 @@ # What is the profiler tool -The major goal of profiler tool is to provide an estimation how fast can be the proof created. -Creating a proof in spacemesh depends on few major factors: +The primary aim of the profiler tool is to provide an estimation of how fast can the [Proof of Space-Time (PoST)](https://docs.spacemesh.io/docs/learn/post) be [generated](https://docs.spacemesh.io/docs/learn/post#generating-the-proof) given existing [Proof of Space (PoS)](https://docs.spacemesh.io/docs/learn/post#proof-of-space) data. Generating the PoST in Spacemesh depends on a few major factors: + * CPU speed * Disk speed -* Amount of storage initialized +* Amount of storage initialized/ size of the PoS data (i.e., how much space one allocates to [smeshing](https://docs.spacemesh.io/docs/start/smeshing/start#what-is-smeshing)) + +Thus, the profiler tool can help a smesher: + +* estimate how much storage one can safely initialize (to be able to generate a proof later). +* configure the proving process (by setting the desired number of CPU threads and [nonce](https://docs.spacemesh.io/docs/learn/post#generating-the-proof) count) optimally for the best use of the available resources. + +## Downloading the profiler tool + +The profiler tool can be downloaded from the [releases page](https://github.com/spacemeshos/post-rs/releases) of the [`post-rs`](https://github.com/spacemeshos/post-rs) repository. The download is located in the "Assets" section of the release page and will be visible after expanding this section by clicking on its title. The exact file to be downloaded for the different operating systems is as follows: + +* `profiler-linux-vX.X.X.zip` for Linux (Ubuntu or Fedora, x86) +* `profiler-linux-arm64-vX.X.X.zip` for Linux (Ubuntu or Fedora, Arm-based) +* `profiler-macos-m1-vX.X.X.zip` for macOS (M1-based) +* `profiler-macos-vX.X.X.zip` for macOS (Intel-based) +* `profiler-windows-vX.X.X.zip` for Windows (x86) + +It is important to understand that the proving time itself does not need to be optimized. It's enough to generate proof in a specified time window, which on the mainnet is 12 hours. + +## Running the profiler tool + +This section will guide you on how to extract and run the profiler tool on different operating systems. Once the appropriate `.zip` file is downloaded, follow the steps below to run the tool: -The primary goal of the profiler tool is to: -* estimate how much storage one can safely initalize (to be able to generate a proof later) -* configure proving (nonces and threads) optimally for the best use of the resources +### Linux/macOS -The best way to get the profiler tool is to get it from the [releases page](https://github.com/spacemeshos/post-rs/releases). +1. Extract the contents of the `.zip` file somewhere. The extracted contents will include a singular script file called `profiler`. +2. Make the `profiler` file executable by opening a terminal in the directory where the file has been extracted and entering this command: `chmod +x profiler`. +3. Open the terminal once again and run the executable file that has just been created by entering this command: `./profiler`. -It's important to understand that the proving time itself does not need to be optimized. It's enough to generate proof in a specified time window, which on the mainnet is 12h. +### Windows -## How to run profiler tool +1. Extract the contents of the `.zip` file somewhere. The extracted contents will include a singular script file called `profiler.exe`. +Open a Windows Powershell terminal in the directory where the file has been extracted. You can do this by holding the "shift" key, right-clicking, and selecting the "Open Powershell here" option. +2. In the Powershell terminal, enter this command to run the profiler tool: `./profiler`. -For accurate results set the path with `--data-file` to the same disk that will be used by the node, otherwise the results of the benchmark might not reflect the actual performance of the node. Please be warned that the contents of that file MIGHT be overwritten. +Having run the tool, one may wonder, what does the output mean? Read on to understand the various input flags one can use to customize each profiler run and how to interpret the subsequent results. -By default the profiler uses 1GiB sample data. This can be changed with the `--data-size` flag. Different sizes can be used to check for bottlenecks of the hardware. +## Understanding the options and commands -The duration of the benchmark can be set with `--duration` and defaults to 10 seconds. A longer duration will yield more accurate results. +When running the profiler tool, you can provide several inputs to it. This section goes over the various commands and options for the profiler tool and what they mean. -There are two parameters that can influence the proofing speed and can be optimized for go-spacemesh: `--threads` and `--nonces`. -The general rule of thumb is the more threads the faster, but because different processors may have different behaviours when all threads are under load. Please check different values for `--threads` to find the optimal one. -The more nonces are used with `--nonces` in ONE data pass, the bigger the chance to find a valid proof. The downside of using more `--nonces` is a heavier load on the CPU. It's important to note that nonces must be a multiple of 16. +### Commands -To simplify the estimation of nonces please use the following formula: -in google sheets format +* `proving`: We are telling the profiler to benchmark the entire PoST generation process. +* `pow:`: We are telling the profiler to only benchmark the Proof of Work (PoW) part of the PoST generation. More on this below. + +If no command is given, the profiler runs the `proving` command by default. + +### Options + +* `--help`: Displays all the available commands and options along with their short descriptions. Use the `-h` flag for summarized help information. +* `--data-file` (path): This is a path to the disk where we plan to store our PoS data. This ensures that the profiler will use the same disk to run the benchmarks as will be used to store the PoS, leading to more accurate results. + * **Default**: The profiler tool will generate a 1 GiB PoS data file in a temporary directory. +* `--data-size` (size in GiB): Increasing this value will give more accurate results. + * **Default**: A PoS data size of 1 GiB will be used for running the benchmark. +* `--duration` (in seconds): Duration in seconds of how long the profiler should run the PoST generation benchmark for. Longer duration yields more accurate results. + * **Default**: 10 seconds. +* `--threads` (count): Number of CPU threads that will be used to run the benchmark. Generally, more threads mean faster PoST generation and lesser generation time. However, different CPUs behave differently when all threads are under load. Thus, keep experimenting with different values for `--threads` (with the maximum being the number of threads your CPU has) to find the optimal thread count. + * **Default**: 4 threads. +* `--nonces` (count, multiple of 16): The amount of nonces to use in the proof of work calculation in one pass over the PoS data. Read [this section](https://docs.spacemesh.io/docs/learn/post#generating-the-proof) of the PoST explainer to understand how nonces are used in the PoST generation process. The greater the `--nonces` value, the more nonces are used in each data pass, increasing the chance of finding the PoST sooner and in lesser data passes. Note that whatever the number value used for `--nonces`, it must be a multiple of 16. One thing to be aware of when setting `--nonces` is that the higher the value used, the more stress the CPU will be under as this part of the PoST generation process is CPU-intensive. + * **Default**: 64 nonces. + +#### Nonce estimation formula + +To help us estimate the value of `--nonces` that will help find the PoST in the least number of passes, the following formula, given in two prominent formats for ease of calculation, can be used: + +**Google Sheets format**: ``` -1-(1-(1-BINOM.DIST(36;10^9;26/10^9;TRUE)))^{put nonces here} +1-(1-(1-BINOM.DIST(36;10^9;26/10^9;TRUE)))^{put nonces value here} ``` -in wolfram alpha format +**Wolfram Alpha format**: ``` -1-(1-(1-CDF[BinomialDistribution[10^9, 26/10^9],36]))^{put nonces here} +1-(1-(1-CDF[BinomialDistribution[10^9, 26/10^9],36]))^{put nonces value here} ``` -This formula gives the probability of finding a valid proof in one data pass. The node will perform multiple passes if needed. +The aforementioned formula gives the probability (with an output of 1 being 100%) of finding a valid PoST in one data pass. The node will perform multiple passes if PoST is not found in one data pass. + +Please note that PoST generation speed scales *linearly* with the number of CPU cores used (assuming that they are equally fast) and *inversely* with the number of nonces in groups of 16 (32 nonces should be twice as slow as 16). This effect might not manifest itself until a high number of nonces is used. The reason for this is that in most setups, the hard disk drive (HDD) speed will become the limiting factor (as a spinning disk drive is used in most computers) if a low number of nonces is used. + +Ultimately, we should find a balance between the usage of these two resources to utilize them the best. If we increase the CPU cores, the faster a valid PoST will be generated. if we increase the number of nonces, the slower a valid PoST will be generated due to the increased CPU usage which results in the CPU being the limiting factor. On the other hand, with a low number of nonces, the HDD becomes the limiting factor as the CPU is processing the data faster than the disk can read it. Knowing this will help us make a better assessment of the profiler tool's inputs and outputs. -Please note that proving speed linearly scales with the number of cores used (assuming that they're equally fast), but inversely scales with the number of nonces in groups of 16 (32 nonces should be twice as slow as 16). This effect might not manifest itself until a high number of nonces is used. The reason is that in most setups the hard disk speed will be the limiting factor if a low number of nonces is used. +## How to interpret the profiler output -## How to interpret the results +We will now understand how to interpret the profiler output. Let us run the following command (for reference, all commands are being run on a 2020 MacBook Air M1 16GB/256GB): ``` -./profiler --data-size 1 --threads=1 --data-file data.bin --nonces=64 +./profiler --threads=1 +``` + +In the above terminal command, we are running the profiler in `proving` mode and have provided the following options: + +* The number of threads to be used is 1 (instead of the default 4). +* For all other options, default values (e.g., 64 for nonces) are to be used ([see above](#understanding-the-options-and-commands)). + +After running this command, we get the following output: + +```bash { - "time_s": 12.09140029, - "speed_gib_s": 0.41351703525481415 + "time_s": 10.316020166, + "speed_gib_s": 0.19387321542775668 } ``` -Here we see the following -* 1GiB file `data.bin` was used for the benchmark -* 1 thread was used -* 64 nonces were used +The output means that the benchmark ran for 10.31 seconds and the speed was 0.19 GiB/s. The profiler makes multiple passes over the data file, up to the configured `--duration` - the longer the duration, the more accurate the result (since it averages). From the [formula above](#nonce-estimation-formula), we know that the probability of finding a proof with 64 nonces is 79.39%. Therefore, there is a ~20% chance that at least two passes are necessary (and a ~ 0.20^x chance that more than x passes are necessary). -The benchmark took 12.09 seconds to complete and the speed was 0.41GiB/s. -From the formula above we know that the probability of finding a proof with 64 nonces is 79.39%. Therefore there is a ~20% chance that at least two passes are necessary (and a ~ 0.20^x chance that more than x passes are necessary). -``` -./profiler --data-size 1 --threads=1 --data-file data.bin --nonces=128 +Let us run another command, this time with the nonce count doubled to 128: + +```bash +./profiler --threads=1 --nonces=128 { - "time_s": 13.152850458, - "speed_gib_s": 0.22808744078552953 + "time_s": 16.450039208, + "speed_gib_s": 0.12158025733016843 } ``` -We see here that the speed dropped by half. That clearly indicates that CPU is our limiting factor. +We see here that the speed dropped by about 37%, which indicates that CPU is our limiting factor since we put more load on the CPU by doubling the nonces but only using 1 CPU thread. Note that with 128 nonces, there is a 95.75% chance of finding a PoST in one data pass. -With 128 nonces there is 95.75% chance to find a proof in one data pass. +Let us run the profiler again, this time with 128 nonces but 10 threads: -``` -./profiler --data-size 1 --threads=10 --data-file data.bin --nonces=128 +```bash +./profiler --threads=10 --nonces=128 { - "time_s": 10.331206291, - "speed_gib_s": 1.8390882405040923 + "time_s": 11.323834001, + "speed_gib_s": 0.6181651902864203 } ``` -With 10 threads we can see that speed increased by 8 times. It is important to optimize for the probability of finding a proof in time rather than optimizing for the speed. On mainnet nodes have a 12 hour window to find and submit a proof. - -Based on these outputs you need to decide what is the best configuration for your hardware. Please note that the speed of the proof generation is not the only factor. +With 10 threads we can see that speed increased by ~400% (4x). It is important to always optimize for the probability of finding a proof quickly within the 12-hour PoST submission rather than optimizing for speed. Based on these outputs, you need to decide what is the best configuration for your particular system. Please note that the speed of the proof generation is not the only factor. -## Is that all that is happening during the proof generation? -Additionally for every group of 16 nonces there is an additional computation - often referred to as `k2pow` - required. It serves as mitigation against some possible attacks by dishonest smeshers. +## What else happens during the PoST generation? -On the mainnet, each set of 16 nonces requires one `k2pow` computation. In the case of a low-end CPU with a hash rate of 500 h/s in the [RandomX benchmark](https://xmrig.com/benchmark), approximately 2 minutes and 30 seconds are needed to create a PoWs for 4 SUs (a Space Unit on the mainnet equates to 64GiB) and 64 nonces (4xPoW). This processing time scales down linearly with the hash rate and it scales up linearily with the number of SUs. You may want to check out the single and multicore results from the benchmark for more details. +For every group of 16 nonces, there is an additional computation - often referred to as `k2pow` - that is performed during the PoST generation process. It serves as mitigation against some possible attacks by dishonest smeshers. -Please add your estimate (number of SU x the result of the RandomX benchmark) to the total time needed to generate a proof. +On the mainnet, each set of 16 nonces requires one `k2pow` computation. In the case of a low-end CPU with a hash rate of 500 h/s in the [RandomX benchmark](https://xmrig.com/benchmark), approximately 2 minutes and 30 seconds are needed to create a PoW for 4 SUs (a Space Unit on the mainnet equates to 64GiB) and 64 nonces (4xPoW). This processing time scales down linearly with the hash rate and it scales up linearly with the number of SUs. You may want to check the single and multi-core results from the RandomX benchmark for more details. Please add your estimate (number of SU x the result of the RandomX benchmark) to the total time needed to generate a proof. ## Benchmarking K2 PoW -The `profiler` allows benchmarking the speed of proof of work. The profiler always executes PoW for 1 SU to speed up measurement and automatically scales up the result by the requested number of units. -To understand the inner mechanics of RandomX proof of work, take a look at its [specification](https://github.com/tevador/RandomX/blob/master/doc/specs.md). +The `profiler` allows benchmarking of the PoW computation speed. The profiler always executes PoW for 1 SU to speed up measurement and automatically scales up the result by the requested number of units. -Refer to `profiler pow --help` to understand how to use the profiler to benchmark the K2 PoW. Most users will need to tweak three arguments: +To understand the inner mechanics of RandomX PoW, take a look at its [specification](https://github.com/tevador/RandomX/blob/master/doc/specs.md). + +Refer to `profiler pow --help` to understand how to use the profiler to benchmark the `k2pow`. Most users will need to tweak three arguments: * `--threads`, * `--num-units`, * `--nonces` ### Example + `profiler pow --nonces 288 --num-units 16 --iterations 10 --threads 2 --randomx-mode fast` -# Tips & Hints +## Tips & Hints + +### How to verify that the CPU is the limiting factor? + +If `speed_gib_s` is slowing down as nonces are increased, then the CPU is most likely the limiting factor. Try to add more threads only if needed. -### How to see that CPU is my limiting factor? -If `speed_gib_s` is slowing down with more noces, then CPU is most likely the limiting factor. Try to add more threads IF needed +### How to verify that the HDD is the limiting factor? -### How to see that disk is my limiting factor? -If `speed_gib_s` is not slowing down with more nonces, then disk is most likely the limiting factor. Try to add more nonces IF needed. That should put more load on CPU and limit the probability of reading the data again from the disk. +If `speed_gib_s` is not slowing down with more nonces, then HDD is most likely the limiting factor. As stated earlier, try to add more nonces only if needed. While increasing nonces puts more load on the CPU, it also increases the chances of finding the proof after a single pass over the PoS data. ### How do I find the sweet spot? -That depends, if you want to generate proof as fast as possible because you cannot have your computer working for 12hours then make it work as hard as possible regardless of anything else. -The general rule of thumb is that you want to match your disk speed with that benchmark. If your disk will be faster then CPU will be working on 100% and disk will NOT be fully utilized, if your CPU is faster than your HDD then the CPU will be waiting for the data from the disk. +That depends. If you want to generate a PoST as fast as possible because you cannot have your computer working for 12 hours, then make it work as hard as possible regardless of anything else. -### Do I need to finish the proof asap? -No, the proof needs to be done within a time window. On the mainnet that time window is 12hours. As long as you're able to generate a proof with high probability within this time, you are good. +The general rule of thumb is that you want to match your HDD speed with that benchmark. If your HDD is faster than the CPU, then the CPU will be under 100% utilization while the HDD will NOT be fully utilized. If your CPU is faster than your HDD then the CPU will be waiting for the data from the HDD. -### Can I really use whole 12hours? -Yes, you can, but please be warned that if you're using the whole 12hours then you're risking that you will not be able to submit the proof in time. Not submitting a proof means skipping an epoch (2 weeks on the mainnet). Please leave some buffer for occasional slowdowns on your side. +### Do I need to finish finding the PoST ASAP? +No. The PoST needs to be done within a time window. On mainnet, this time window is 12 hours long. As long as you are able to generate a PoST with a high probability within this time, you are good. However, you should not wait until the very end of the window. Try to find and submit a PoST at your earliest convenience to avoid accidentally missing the time window and not being eligible for that epoch. -## How to use the values +### Can I use the entire 12-hour window to submit the PoST? -``` +Yes, you can. However, be warned that you should not wait until the very end of the window to submit the proof. Try to find and submit a PoST at your earliest convenience to avoid accidentally missing the time window and not being eligible for that epoch. Not submitting a proof during this window means skipping an epoch (2 weeks on the mainnet). **Please leave some buffer for occasional slowdowns or any other unforeseen circumstances on your side**. + +## How to use the profiler output values? + +Once you have found your perfect nonces and threads values, enter them in node config file. For example: + +```JSON "smeshing-proving-opts": { "smeshing-opts-proving-nonces": 144, - "smeshing-opts-proving-threads": 0 + "smeshing-opts-proving-threads": 10 }, ``` -Place that in your node config under `smeshing` key. Please note that the values are just an example and you need to use your own values. This is a `json` file so please make sure that it's formatted properly. +Place that in your node config JSON file under `smeshing` key. Please note that the values are just an example and you need to use your own values. Node config is a `JSON` file so please make sure that the values are formatted properly. -If you're using `smapp` then please put it to the node custom config file named `node-config.7c8cef2b.json` in Spacemesh app directory. +For Smapp users, the node config file is titled: `node-config.7c8cef2b.json` and is located in the Spacemesh directory.