From ac22b88bf974360040d5b4e41273e296451c7f6e Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 9 Aug 2024 09:22:12 -0400 Subject: [PATCH] cargo: straighten out LTO configuration This PR tweaks the change made in #5904 so that the `profiling` Cargo profile does _not_ have LTO enabled. With LTO enabled, compile times even after just doing a `touch crates/uv/src/bin/uv.rs` are devastating: $ cargo b --profile profiling -p uv Compiling uv-cli v0.0.1 (/home/andrew/astral/uv/crates/uv-cli) Compiling uv v0.2.34 (/home/andrew/astral/uv/crates/uv) Finished `profiling` profile [optimized + debuginfo] target(s) in 3m 47s Even with `lto = "thin"`, compile times are not great, but an improvement: $ cargo b --profile profiling -p uv Compiling uv v0.2.34 (/home/andrew/astral/uv/crates/uv) Finished `profiling` profile [optimized + debuginfo] target(s) in 53.98s But our original configuration for `profiling`, prior to #5904, was with LTO completely disabled: $ cargo b --profile profiling -p uv Compiling uv v0.2.34 (/home/andrew/astral/uv/crates/uv) Finished `profiling` profile [optimized + debuginfo] target(s) in 30.09s This gives reasonable-ish compile times, although I still want them to be better. This setup does risk that we are measuring something in benchmarks that we are shipping, but in order to make those two the same, we'd either need to make compile times way worse for development, or take a hit to binary size and a slight hit to runtime performance in our release builds. I would weakly prefer that we accept the hit to runtime performance and binary size in order to bring our measurements in line with what we ship, but I _strongly_ feel that we should not have compile times exceeding minutes for development. When doing performance testing, long compile times, for me anyway, break "flow" state. A confounding factor here was that #5904 enabled LTO for the `release` profile, but the `dist` profile (used by `cargo dist`) was still setting it to `lto = "thin"`. However, because of shenanigans in our release pipeline, we we actually using the `release` profile for binaries we ship. This PR does not make any changes here other than to remove `lto = "thin"` from the `dist` profile to make the fact that they are the same a bit clearer. cc @davfsa --- Cargo.toml | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 22cf050689509..7bd3237c09588 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -205,12 +205,49 @@ rest_pat_in_fully_bound_structs = "warn" [profile.release] strip = true -lto = true +lto = "fat" +# This profile is meant to mimic the `release` profile as closely as +# possible, but using settings that are more beneficial for iterative +# development. That is, the `release` profile is intended for actually +# building the release, where as `profiling` is meant for building `uv` +# for running benchmarks. +# +# The main differences here are to avoid stripping debug information +# and disabling lto. This does result in a mismatch between our release +# configuration and our benchmarking configuration, which is unfortunate. +# But compile times with `lto = true` are completely untenable: +# +# $ cargo b --profile profiling -p uv +# Compiling uv-cli v0.0.1 (/home/andrew/astral/uv/crates/uv-cli) +# Compiling uv v0.2.34 (/home/andrew/astral/uv/crates/uv) +# Finished `profiling` profile [optimized + debuginfo] target(s) in 3m 47s +# +# Using `lto = "thin"` brings a massive improvement, but it's still slow: +# +# $ cargo b --profile profiling -p uv +# Compiling uv v0.2.34 (/home/andrew/astral/uv/crates/uv) +# Finished `profiling` profile [optimized + debuginfo] target(s) in 53.98s +# +# But with `lto = false`: +# +# $ cargo b --profile profiling -p uv +# Compiling uv v0.2.34 (/home/andrew/astral/uv/crates/uv) +# Finished `profiling` profile [optimized + debuginfo] target(s) in 30.09s +# +# We get more reasonable-ish compile times. At least, it's not enough +# time to get up and get a cup of coffee before it completes. +# +# This setup does risk that we are measuring something in benchmarks +# that we are shipping, but in order to make those two the same, we'd +# either need to make compile times way worse for development, or take +# a hit to binary size and a slight hit to runtime performance in our +# release builds. [profile.profiling] inherits = "release" strip = false -debug = true +debug = "full" +lto = false [profile.fast-build] inherits = "dev" @@ -220,7 +257,6 @@ strip = "debuginfo" # The profile that 'cargo dist' will build with. [profile.dist] inherits = "release" -lto = "thin" # Config for 'cargo dist' [workspace.metadata.dist]