Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stabilize the size of incr comp object file names #123441

Merged
merged 1 commit into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions compiler/rustc_codegen_gcc/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use gccjit::{
use rustc_codegen_ssa::base::wants_msvc_seh;
use rustc_codegen_ssa::errors as ssa_errors;
use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, MiscMethods};
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_middle::mir::mono::CodegenUnit;
use rustc_middle::span_bug;
Expand Down Expand Up @@ -621,7 +622,7 @@ impl<'b, 'tcx> CodegenCx<'b, 'tcx> {
let mut name = String::with_capacity(prefix.len() + 6);
name.push_str(prefix);
name.push_str(".");
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
name
}
}
Expand Down
5 changes: 3 additions & 2 deletions compiler/rustc_codegen_llvm/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ use crate::value::Value;
use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh};
use rustc_codegen_ssa::errors as ssa_errors;
use rustc_codegen_ssa::traits::*;
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::small_c_str::SmallCStr;
use rustc_hir::def_id::DefId;
Expand Down Expand Up @@ -1015,7 +1016,7 @@ impl CodegenCx<'_, '_> {
let mut name = String::with_capacity(prefix.len() + 6);
name.push_str(prefix);
name.push('.');
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
name
}
}
Expand Down
114 changes: 90 additions & 24 deletions compiler/rustc_data_structures/src/base_n.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/// Converts unsigned integers into a string representation with some base.
/// Bases up to and including 36 can be used for case-insensitive things.
use std::str;
use std::ascii;
use std::fmt;

#[cfg(test)]
mod tests;
Expand All @@ -9,36 +10,101 @@ pub const MAX_BASE: usize = 64;
pub const ALPHANUMERIC_ONLY: usize = 62;
pub const CASE_INSENSITIVE: usize = 36;

const BASE_64: &[u8; MAX_BASE] =
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
const BASE_64: [ascii::Char; MAX_BASE] = {
let bytes = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
let Some(ascii) = bytes.as_ascii() else { panic!() };
*ascii
};

#[inline]
pub fn push_str(mut n: u128, base: usize, output: &mut String) {
debug_assert!(base >= 2 && base <= MAX_BASE);
let mut s = [0u8; 128];
let mut index = s.len();
pub struct BaseNString {
start: usize,
buf: [ascii::Char; 128],
}

impl std::ops::Deref for BaseNString {
type Target = str;

let base = base as u128;
fn deref(&self) -> &str {
self.buf[self.start..].as_str()
}
}

impl AsRef<str> for BaseNString {
fn as_ref(&self) -> &str {
self.buf[self.start..].as_str()
}
}

impl fmt::Display for BaseNString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self)
}
}

// This trait just lets us reserve the exact right amount of space when doing fixed-length
// case-insensitve encoding. Add any impls you need.
pub trait ToBaseN: Into<u128> {
fn encoded_len(base: usize) -> usize;

fn to_base_fixed_len(self, base: usize) -> BaseNString {
let mut encoded = self.to_base(base);
encoded.start = encoded.buf.len() - Self::encoded_len(base);
encoded
}

loop {
index -= 1;
s[index] = BASE_64[(n % base) as usize];
n /= base;
fn to_base(self, base: usize) -> BaseNString {
let mut output = [ascii::Char::Digit0; 128];

if n == 0 {
break;
let mut n: u128 = self.into();

let mut index = output.len();
loop {
index -= 1;
output[index] = BASE_64[(n % base as u128) as usize];
n /= base as u128;

if n == 0 {
break;
}
}
assert_eq!(n, 0);

BaseNString { start: index, buf: output }
}
}

impl ToBaseN for u128 {
fn encoded_len(base: usize) -> usize {
let mut max = u128::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u128;
}
len
}
}

output.push_str(unsafe {
// SAFETY: `s` is populated using only valid utf8 characters from `BASE_64`
str::from_utf8_unchecked(&s[index..])
});
impl ToBaseN for u64 {
fn encoded_len(base: usize) -> usize {
let mut max = u64::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u64;
}
len
}
}

#[inline]
pub fn encode(n: u128, base: usize) -> String {
let mut s = String::new();
push_str(n, base, &mut s);
s
impl ToBaseN for u32 {
fn encoded_len(base: usize) -> usize {
let mut max = u32::MAX;
let mut len = 0;
while max > 0 {
len += 1;
max /= base as u32;
}
len
}
}
12 changes: 10 additions & 2 deletions compiler/rustc_data_structures/src/base_n/tests.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
use super::*;

#[test]
fn test_encode() {
fn limits() {
assert_eq!(Ok(u128::MAX), u128::from_str_radix(&u128::MAX.to_base(36), 36));
assert_eq!(Ok(u64::MAX), u64::from_str_radix(&u64::MAX.to_base(36), 36));
assert_eq!(Ok(u32::MAX), u32::from_str_radix(&u32::MAX.to_base(36), 36));
}

#[test]
fn test_to_base() {
fn test(n: u128, base: usize) {
assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base(base), base as u32));
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base_fixed_len(base), base as u32));
}

for base in 2..37 {
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_data_structures/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#![doc(rust_logo)]
#![feature(allocator_api)]
#![feature(array_windows)]
#![feature(ascii_char)]
#![feature(ascii_char_variants)]
#![feature(auto_traits)]
#![feature(cfg_match)]
#![feature(core_intrinsics)]
Expand Down
54 changes: 25 additions & 29 deletions compiler/rustc_incremental/src/persist/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,14 @@
//! implemented.

use crate::errors;
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::BaseNString;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::flock;
use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
use rustc_data_structures::svh::Svh;
use rustc_data_structures::unord::{UnordMap, UnordSet};
use rustc_data_structures::{base_n, flock};
use rustc_errors::ErrorGuaranteed;
use rustc_fs_util::{link_or_copy, try_canonicalize, LinkOrCopy};
use rustc_session::config::CrateType;
Expand Down Expand Up @@ -332,31 +336,24 @@ pub fn finalize_session_directory(sess: &Session, svh: Option<Svh>) {

debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display());

let old_sub_dir_name = incr_comp_session_dir
let mut sub_dir_name = incr_comp_session_dir
.file_name()
.unwrap()
.to_str()
.expect("malformed session dir name: contains non-Unicode characters");
.expect("malformed session dir name: contains non-Unicode characters")
.to_string();

// Keep the 's-{timestamp}-{random-number}' prefix, but replace the
// '-working' part with the SVH of the crate
let dash_indices: Vec<_> = old_sub_dir_name.match_indices('-').map(|(idx, _)| idx).collect();
if dash_indices.len() != 3 {
bug!(
"Encountered incremental compilation session directory with \
malformed name: {}",
incr_comp_session_dir.display()
)
}

// State: "s-{timestamp}-{random-number}-"
let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]);
// Keep the 's-{timestamp}-{random-number}' prefix, but replace "working" with the SVH of the crate
sub_dir_name.truncate(sub_dir_name.len() - "working".len());
// Double-check that we kept this: "s-{timestamp}-{random-number}-"
assert!(sub_dir_name.ends_with('-'), "{:?}", sub_dir_name);
assert!(sub_dir_name.as_bytes().iter().filter(|b| **b == b'-').count() == 3);

// Append the svh
base_n::push_str(svh.as_u128(), INT_ENCODE_BASE, &mut new_sub_dir_name);
// Append the SVH
sub_dir_name.push_str(&svh.as_u128().to_base_fixed_len(CASE_INSENSITIVE));

// Create the full path
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
let new_path = incr_comp_session_dir.parent().unwrap().join(&*sub_dir_name);
debug!("finalize_session_directory() - new path: {}", new_path.display());

match rename_path_with_retry(&*incr_comp_session_dir, &new_path, 3) {
Expand Down Expand Up @@ -452,11 +449,11 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
let random_number = thread_rng().next_u32();
debug!("generate_session_dir_path: random_number = {}", random_number);

let directory_name = format!(
"s-{}-{}-working",
timestamp,
base_n::encode(random_number as u128, INT_ENCODE_BASE)
);
// Chop the first 3 characters off the timestamp. Those 3 bytes will be zero for a while.
let (zeroes, timestamp) = timestamp.split_at(3);
assert_eq!(zeroes, "000");
let directory_name =
format!("s-{}-{}-working", timestamp, random_number.to_base_fixed_len(CASE_INSENSITIVE));
debug!("generate_session_dir_path: directory_name = {}", directory_name);
let directory_path = crate_dir.join(directory_name);
debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
Expand Down Expand Up @@ -587,10 +584,10 @@ fn extract_timestamp_from_session_dir(directory_name: &str) -> Result<SystemTime
string_to_timestamp(&directory_name[dash_indices[0] + 1..dash_indices[1]])
}

fn timestamp_to_string(timestamp: SystemTime) -> String {
fn timestamp_to_string(timestamp: SystemTime) -> BaseNString {
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000;
base_n::encode(micros as u128, INT_ENCODE_BASE)
micros.to_base_fixed_len(CASE_INSENSITIVE)
}

fn string_to_timestamp(s: &str) -> Result<SystemTime, &'static str> {
Expand Down Expand Up @@ -621,9 +618,8 @@ fn crate_path(sess: &Session) -> PathBuf {
sess.cfg_version,
);

let stable_crate_id = base_n::encode(stable_crate_id.as_u64() as u128, INT_ENCODE_BASE);

let crate_name = format!("{crate_name}-{stable_crate_id}");
let crate_name =
format!("{crate_name}-{}", stable_crate_id.as_u64().to_base_fixed_len(CASE_INSENSITIVE));
incr_dir.join(crate_name)
}

Expand Down
11 changes: 5 additions & 6 deletions compiler/rustc_middle/src/mir/mono.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
use crate::ty::{GenericArgs, Instance, InstanceDef, SymbolName, TyCtxt};
use rustc_attr::InlineAttr;
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::BaseNString;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::FxIndexMap;
Expand Down Expand Up @@ -334,14 +336,11 @@ impl<'tcx> CodegenUnit<'tcx> {
self.is_code_coverage_dead_code_cgu = true;
}

pub fn mangle_name(human_readable_name: &str) -> String {
// We generate a 80 bit hash from the name. This should be enough to
// avoid collisions and is still reasonably short for filenames.
pub fn mangle_name(human_readable_name: &str) -> BaseNString {
let mut hasher = StableHasher::new();
human_readable_name.hash(&mut hasher);
let hash: Hash128 = hasher.finish();
let hash = hash.as_u128() & ((1u128 << 80) - 1);
base_n::encode(hash, base_n::CASE_INSENSITIVE)
hash.as_u128().to_base_fixed_len(CASE_INSENSITIVE)
}

pub fn compute_size_estimate(&mut self) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
//!
//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
//! see design document in the tracking issue #89653.
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::fx::FxHashMap;
use rustc_hir as hir;
use rustc_middle::bug;
Expand Down Expand Up @@ -736,7 +738,7 @@ fn encode_ty_name(tcx: TyCtxt<'_>, def_id: DefId) -> String {
/// <https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html>).
fn to_disambiguator(num: u64) -> String {
if let Some(num) = num.checked_sub(1) {
format!("s{}_", base_n::encode(num as u128, base_n::ALPHANUMERIC_ONLY))
format!("s{}_", num.to_base(ALPHANUMERIC_ONLY))
} else {
"s_".to_string()
}
Expand All @@ -746,7 +748,7 @@ fn to_disambiguator(num: u64) -> String {
/// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.seq-id>).
fn to_seq_id(num: usize) -> String {
if let Some(num) = num.checked_sub(1) {
base_n::encode(num as u128, base_n::CASE_INSENSITIVE).to_uppercase()
(num as u64).to_base(CASE_INSENSITIVE).to_uppercase()
} else {
"".to_string()
}
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_symbol_mangling/src/v0.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use rustc_data_structures::base_n;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::intern::Interned;
use rustc_hir as hir;
Expand Down Expand Up @@ -831,7 +831,7 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
pub(crate) fn push_integer_62(x: u64, output: &mut String) {
if let Some(x) = x.checked_sub(1) {
base_n::push_str(x as u128, base_n::ALPHANUMERIC_ONLY, output);
output.push_str(&x.to_base(62));
}
output.push('_');
}
Expand Down
Loading