Skip to content

Commit

Permalink
Auto merge of #98851 - klensy:encode_symbols, r=cjgillot
Browse files Browse the repository at this point in the history
rustc_metadata: dedupe strings to prevent multiple copies in rmeta/query cache blow file size

r? `@cjgillot`

Encodes strings in rmeta/query cache so duplicated ones will be encoded as offsets to first strings, reducing file size.
  • Loading branch information
bors committed Aug 18, 2022
2 parents 0b79f75 + adba469 commit 71ecf5d
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 3 deletions.
29 changes: 29 additions & 0 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,35 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Span {
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Symbol {
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Self {
let tag = d.read_u8();

match tag {
SYMBOL_STR => {
let s = d.read_str();
Symbol::intern(s)
}
SYMBOL_OFFSET => {
// read str offset
let pos = d.read_usize();
let old_pos = d.opaque.position();

// move to str ofset and read
d.opaque.set_position(pos);
let s = d.read_str();
let sym = Symbol::intern(s);

// restore position
d.opaque.set_position(old_pos);

sym
}
_ => unreachable!(),
}
}
}

impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for &'tcx [ty::abstract_const::Node<'tcx>] {
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Self {
ty::codec::RefDecodable::decode(d)
Expand Down
21 changes: 21 additions & 0 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use rustc_span::{
};
use rustc_target::abi::VariantIdx;
use std::borrow::Borrow;
use std::collections::hash_map::Entry;
use std::hash::Hash;
use std::io::{Read, Seek, Write};
use std::iter;
Expand Down Expand Up @@ -75,6 +76,7 @@ pub(super) struct EncodeContext<'a, 'tcx> {
required_source_files: Option<GrowableBitSet<usize>>,
is_proc_macro: bool,
hygiene_ctxt: &'a HygieneEncodeContext,
symbol_table: FxHashMap<Symbol, usize>,
}

/// If the current crate is a proc-macro, returns early with `Lazy:empty()`.
Expand Down Expand Up @@ -307,6 +309,24 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
}
}

impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Symbol {
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
match s.symbol_table.entry(*self) {
Entry::Vacant(o) => {
s.opaque.emit_u8(SYMBOL_STR);
let pos = s.opaque.position();
o.insert(pos);
s.emit_str(self.as_str());
}
Entry::Occupied(o) => {
let x = o.get().clone();
s.emit_u8(SYMBOL_OFFSET);
s.emit_usize(x);
}
}
}
}

impl<'a, 'tcx> TyEncoder for EncodeContext<'a, 'tcx> {
const CLEAR_CROSS_CRATE: bool = true;

Expand Down Expand Up @@ -2259,6 +2279,7 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>, path: &Path) {
required_source_files,
is_proc_macro: tcx.sess.crate_types().contains(&CrateType::ProcMacro),
hygiene_ctxt: &hygiene_ctxt,
symbol_table: Default::default(),
};

// Encode the rustc version string in a predictable location.
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,10 @@ const TAG_VALID_SPAN_LOCAL: u8 = 0;
const TAG_VALID_SPAN_FOREIGN: u8 = 1;
const TAG_PARTIAL_SPAN: u8 = 2;

// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1;

pub fn provide(providers: &mut Providers) {
encoder::provide(providers);
decoder::provide(providers);
Expand Down
58 changes: 57 additions & 1 deletion compiler/rustc_query_impl/src/on_disk_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ use rustc_span::hygiene::{
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
};
use rustc_span::source_map::{SourceMap, StableSourceFileId};
use rustc_span::CachingSourceMapView;
use rustc_span::{BytePos, ExpnData, ExpnHash, Pos, SourceFile, Span};
use rustc_span::{CachingSourceMapView, Symbol};
use std::collections::hash_map::Entry;
use std::io;
use std::mem;

Expand All @@ -38,6 +39,10 @@ const TAG_RELATIVE_SPAN: u8 = 2;
const TAG_SYNTAX_CONTEXT: u8 = 0;
const TAG_EXPN_DATA: u8 = 1;

// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1;

/// Provides an interface to incremental compilation data cached from the
/// previous compilation session. This data will eventually include the results
/// of a few selected queries (like `typeck` and `mir_optimized`) and
Expand Down Expand Up @@ -254,6 +259,7 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
source_map: CachingSourceMapView::new(tcx.sess.source_map()),
file_to_file_index,
hygiene_context: &hygiene_encode_context,
symbol_table: Default::default(),
};

// Encode query results.
Expand Down Expand Up @@ -714,6 +720,36 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Span {
}
}

// copy&paste impl from rustc_metadata
impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Symbol {
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
let tag = d.read_u8();

match tag {
SYMBOL_STR => {
let s = d.read_str();
Symbol::intern(s)
}
SYMBOL_OFFSET => {
// read str offset
let pos = d.read_usize();
let old_pos = d.opaque.position();

// move to str ofset and read
d.opaque.set_position(pos);
let s = d.read_str();
let sym = Symbol::intern(s);

// restore position
d.opaque.set_position(old_pos);

sym
}
_ => unreachable!(),
}
}
}

impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for CrateNum {
fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self {
let stable_id = StableCrateId::decode(d);
Expand Down Expand Up @@ -815,6 +851,7 @@ pub struct CacheEncoder<'a, 'tcx> {
source_map: CachingSourceMapView<'tcx>,
file_to_file_index: FxHashMap<*const SourceFile, SourceFileIndex>,
hygiene_context: &'a HygieneEncodeContext,
symbol_table: FxHashMap<Symbol, usize>,
}

impl<'a, 'tcx> CacheEncoder<'a, 'tcx> {
Expand Down Expand Up @@ -899,6 +936,25 @@ impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Span {
}
}

// copy&paste impl from rustc_metadata
impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Symbol {
fn encode(&self, s: &mut CacheEncoder<'a, 'tcx>) {
match s.symbol_table.entry(*self) {
Entry::Vacant(o) => {
s.encoder.emit_u8(SYMBOL_STR);
let pos = s.encoder.position();
o.insert(pos);
s.emit_str(self.as_str());
}
Entry::Occupied(o) => {
let x = o.get().clone();
s.emit_u8(SYMBOL_OFFSET);
s.emit_usize(x);
}
}
}
}

impl<'a, 'tcx> TyEncoder for CacheEncoder<'a, 'tcx> {
type I = TyCtxt<'tcx>;
const CLEAR_CROSS_CRATE: bool = false;
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1852,14 +1852,14 @@ impl fmt::Display for Symbol {
}

impl<S: Encoder> Encodable<S> for Symbol {
fn encode(&self, s: &mut S) {
default fn encode(&self, s: &mut S) {
s.emit_str(self.as_str());
}
}

impl<D: Decoder> Decodable<D> for Symbol {
#[inline]
fn decode(d: &mut D) -> Symbol {
default fn decode(d: &mut D) -> Symbol {
Symbol::intern(&d.read_str())
}
}
Expand Down

0 comments on commit 71ecf5d

Please sign in to comment.