Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Cache passed type checks for many configs #15090

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cli/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,22 @@ use deno_graph::source::LoadFuture;
use deno_graph::source::LoadResponse;
use deno_graph::source::Loader;
use deno_runtime::permissions::Permissions;
use std::collections::HashSet;
use std::sync::Arc;

#[derive(Debug, Deserialize, Serialize)]
pub struct EmitMetadata {
pub version_hash: String,
pub type_checks_succeeded: HashSet<String>,
nayeemrmn marked this conversation as resolved.
Show resolved Hide resolved
nayeemrmn marked this conversation as resolved.
Show resolved Hide resolved
}

pub enum CacheType {
Emit,
SourceMap,
TypeScriptBuildInfo,
Version,
GetTypeCheckSucceeded(String),
SetTypeCheckSucceeded,
}

/// A trait which provides a concise implementation to getting and setting
Expand Down
25 changes: 25 additions & 0 deletions cli/disk_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,17 @@ impl Cacher for DiskCache {
CacheType::Version => {
return self.get_emit_metadata(specifier).map(|d| d.version_hash)
}
// TODO(nayeemrmn): This returns `Some("")`/`None` to represent a boolean
// because `Cacher::get()` is constrained that way. Clean up.
CacheType::GetTypeCheckSucceeded(config_hash) => {
if let Some(metadata) = self.get_emit_metadata(specifier) {
if metadata.type_checks_succeeded.contains(&config_hash) {
return Some("".to_string());
}
}
return None;
}
CacheType::SetTypeCheckSucceeded => panic!("Set only"),
};
let filename =
self.get_cache_filename_with_extension(specifier, extension)?;
Expand All @@ -209,15 +220,29 @@ impl Cacher for DiskCache {
CacheType::TypeScriptBuildInfo => "buildinfo",
CacheType::Version => {
let data = if let Some(mut data) = self.get_emit_metadata(specifier) {
if value != data.version_hash {
data.type_checks_succeeded.clear();
}
data.version_hash = value;
data
} else {
EmitMetadata {
version_hash: value,
type_checks_succeeded: Default::default(),
}
};
return self.set_emit_metadata(specifier, data);
}
CacheType::GetTypeCheckSucceeded(_) => panic!("Get only"),
CacheType::SetTypeCheckSucceeded => {
let mut data = self
.get_emit_metadata(specifier)
.expect("Must set emit before TypeCheckSucceeded");
if data.type_checks_succeeded.insert(value) {
self.set_emit_metadata(specifier, data)?;
}
return Ok(());
}
};
let filename = self
.get_cache_filename_with_extension(specifier, extension)
Expand Down
125 changes: 96 additions & 29 deletions cli/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,23 @@ pub trait EmitCache {
/// Sets the emit data in the cache.
fn set_emit_data(
&self,
specifier: ModuleSpecifier,
specifier: &ModuleSpecifier,
data: SpecifierEmitCacheData,
) -> Result<(), AnyError>;
/// Check if a type check with the given config succeeded with this module
/// as the entrypoint. Use `hash_check_config()` to hash the config.
fn get_type_check_succeeded(
&self,
specifier: &ModuleSpecifier,
config_hash: &str,
) -> bool;
/// Mark that a type check with the given config succeeded with this module
/// as the entrypoint. Use `hash_check_config()` to hash the config.
fn set_type_check_succeeded(
&self,
specifier: &ModuleSpecifier,
config_hash: String,
) -> Result<(), AnyError>;
/// Gets the .tsbuildinfo file from the cache.
fn get_tsbuildinfo(&self, specifier: &ModuleSpecifier) -> Option<String>;
/// Sets the .tsbuildinfo file in the cache.
Expand Down Expand Up @@ -102,17 +116,38 @@ impl<T: Cacher> EmitCache for T {

fn set_emit_data(
&self,
specifier: ModuleSpecifier,
specifier: &ModuleSpecifier,
data: SpecifierEmitCacheData,
) -> Result<(), AnyError> {
self.set(CacheType::Version, &specifier, data.source_hash)?;
self.set(CacheType::Emit, &specifier, data.text)?;
self.set(CacheType::Version, specifier, data.source_hash)?;
self.set(CacheType::Emit, specifier, data.text)?;
if let Some(map) = data.map {
self.set(CacheType::SourceMap, &specifier, map)?;
self.set(CacheType::SourceMap, specifier, map)?;
}
Ok(())
}

fn get_type_check_succeeded(
&self,
specifier: &ModuleSpecifier,
config_hash: &str,
) -> bool {
self
.get(
CacheType::GetTypeCheckSucceeded(config_hash.to_string()),
specifier,
)
.is_some()
}

fn set_type_check_succeeded(
&self,
specifier: &ModuleSpecifier,
config_hash: String,
) -> Result<(), AnyError> {
self.set(CacheType::SetTypeCheckSucceeded, specifier, config_hash)
}

fn get_tsbuildinfo(&self, specifier: &ModuleSpecifier) -> Option<String> {
self.get(CacheType::TypeScriptBuildInfo, specifier)
}
Expand All @@ -126,6 +161,33 @@ impl<T: Cacher> EmitCache for T {
}
}

/// Hash the set of options which can affect the result of a type check on a
/// given module graph. This should be stored in the cached metadata for each
/// root of the graph.
///
/// The config also includes the exact set of roots used. For example, if two
/// type checks are performed and the second uses a subset of the same roots,
/// it's possible that the first passes and the second fails because of missing
/// global augmentations. But without including the roots in the config hash
/// the second would pass the other preliminary checks and short-circuit.
fn hash_check_config(
ts_config: &TsConfig,
mode: TypeCheckMode,
roots: &[(ModuleSpecifier, ModuleKind)],
) -> String {
let mut input = vec![ts_config.as_bytes(), vec![mode as u8]];
nayeemrmn marked this conversation as resolved.
Show resolved Hide resolved
let mut roots = roots
.iter()
.map(|(s, k)| (s.as_str(), serde_json::to_string(k).unwrap()))
.collect::<Vec<_>>();
roots.sort();
for (specifier, kind) in roots {
input.push(specifier.as_bytes().to_vec());
input.push(kind.as_bytes().to_vec());
}
crate::checksum::gen(&input)
}

/// A structure representing stats from an emit operation for a graph.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct Stats(pub Vec<(String, u32)>);
Expand Down Expand Up @@ -339,15 +401,11 @@ fn get_tsc_roots(
}
}

/// A hashing function that takes the source code, version and optionally a
/// user provided config and generates a string hash which can be stored to
/// determine if the cached emit is valid or not.
fn get_version(source_bytes: &[u8], config_bytes: &[u8]) -> String {
crate::checksum::gen(&[
source_bytes,
version::deno().as_bytes(),
config_bytes,
])
/// A hashing function that takes the source code and runtime version to
/// generates a string hash which can be stored to determine if the cached emit
/// is valid or not.
fn get_version(source_bytes: &[u8]) -> String {
crate::checksum::gen(&[source_bytes, version::deno().as_bytes()])
}

/// Determine if a given module kind and media type is emittable or not.
Expand Down Expand Up @@ -421,9 +479,11 @@ pub fn check_and_maybe_emit(
graph_data.graph_segment(roots).unwrap()
};
if valid_emit(
roots,
&segment_graph_data,
cache,
&options.ts_config,
options.type_check_mode,
options.reload,
&options.reload_exclusions,
) {
Expand Down Expand Up @@ -454,10 +514,9 @@ pub fn check_and_maybe_emit(
options.ts_config.as_bytes(),
version::deno().as_bytes().to_owned(),
];
let config_bytes = options.ts_config.as_bytes();

let response = tsc::exec(tsc::Request {
config: options.ts_config,
config: options.ts_config.clone(),
debug: options.debug,
graph_data: graph_data.clone(),
hash_data,
Expand Down Expand Up @@ -548,7 +607,7 @@ pub fn check_and_maybe_emit(
let mut emit_data_item = emit_data_items
.entry(specifier.clone())
.or_insert_with(|| SpecifierEmitData {
version_hash: get_version(source_bytes, &config_bytes),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this possibly break stuff like if someone changes their JSX settings? I feel like we should keep config_bytes here.

Copy link
Collaborator Author

@nayeemrmn nayeemrmn Jul 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this doesn't factor in config changes which affect emit... but putting that back will break the added behaviour. I mulled over a "correct" structure which looks like this:

#[derive(Debug, Deserialize, Serialize)]
pub struct EmitMetadata {
  #[serde(default)]
  pub source_hash: String,
  /// Hashed config used for the currently stored emit. Only considers options
  /// which affect how code is emitted.
  #[serde(default)]
  pub emit_config_hash: String,
  /// List of hashed configs for successful type checks. Only considers options
  /// which affect the result of a type check.
  #[serde(default)]
  pub check_config_hashes: HashSet<String>,
}

and reached the conclusion that this can be nicely addressed only after we move to full swc emit and decouple emitting and type-checking.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes caching a lot more complicated and I'm still not sure it's worth it. It seems much easier and less error prone to just invalidate all the caches whenever the config changes. How often do people have multiple configs or no config and then a config when type checking? Is this actually something users need?

Copy link
Collaborator Author

@nayeemrmn nayeemrmn Jul 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It happens with workers also, although that's only with deno run --check now.

version_hash: get_version(source_bytes),
text: None,
map: None,
});
Expand All @@ -569,9 +628,12 @@ pub fn check_and_maybe_emit(
}

// now insert these items into the cache
let config_hash =
hash_check_config(&options.ts_config, options.type_check_mode, roots);
for (specifier, data) in emit_data_items.into_iter() {
if let Some(cache_data) = data.into_cache_data() {
cache.set_emit_data(specifier, cache_data)?;
cache.set_emit_data(&specifier, cache_data)?;
cache.set_type_check_succeeded(&specifier, config_hash.clone())?;
}
}
}
Expand All @@ -597,7 +659,6 @@ pub fn emit(
options: EmitOptions,
) -> Result<CheckEmitResult, AnyError> {
let start = Instant::now();
let config_bytes = options.ts_config.as_bytes();
let include_js = options.ts_config.get_check_js();
let emit_options = options.ts_config.into();

Expand All @@ -610,10 +671,8 @@ pub fn emit(
}
let needs_reload =
options.reload && !options.reload_exclusions.contains(&module.specifier);
let version = get_version(
module.maybe_source.as_ref().map(|s| s.as_bytes()).unwrap(),
&config_bytes,
);
let version =
get_version(module.maybe_source.as_ref().map(|s| s.as_bytes()).unwrap());
let is_valid = cache
.get_source_hash(&module.specifier)
.map_or(false, |v| v == version);
Expand All @@ -627,7 +686,7 @@ pub fn emit(
.unwrap()?;
emit_count += 1;
cache.set_emit_data(
module.specifier.clone(),
&module.specifier,
SpecifierEmitCacheData {
source_hash: version,
text: transpiled_source.text,
Expand All @@ -653,13 +712,16 @@ pub fn emit(
/// graph are emittable and for those that are emittable, if there is currently
/// a valid emit in the cache.
fn valid_emit(
roots: &[(ModuleSpecifier, ModuleKind)],
graph_data: &GraphData,
cache: &dyn EmitCache,
ts_config: &TsConfig,
mode: TypeCheckMode,
reload: bool,
reload_exclusions: &HashSet<ModuleSpecifier>,
) -> bool {
let config_bytes = ts_config.as_bytes();
let config_hash = hash_check_config(ts_config, mode, roots);
let roots = roots.iter().map(|e| e.0.clone()).collect::<HashSet<_>>();
let check_js = ts_config.get_check_js();
for (specifier, module_entry) in graph_data.entries() {
if let ModuleEntry::Module {
Expand All @@ -673,6 +735,9 @@ fn valid_emit(
MediaType::TypeScript
| MediaType::Mts
| MediaType::Cts
| MediaType::Dts
| MediaType::Dmts
| MediaType::Dcts
| MediaType::Tsx
| MediaType::Jsx => {}
MediaType::JavaScript | MediaType::Mjs | MediaType::Cjs => {
Expand All @@ -683,22 +748,24 @@ fn valid_emit(
MediaType::Json
| MediaType::TsBuildInfo
| MediaType::SourceMap
| MediaType::Dts
| MediaType::Dmts
| MediaType::Dcts
| MediaType::Wasm
| MediaType::Unknown => continue,
}
if reload && !reload_exclusions.contains(specifier) {
return false;
}
if let Some(source_hash) = cache.get_source_hash(specifier) {
if source_hash != get_version(code.as_bytes(), &config_bytes) {
if source_hash != get_version(code.as_bytes()) {
return false;
}
} else {
return false;
}
if roots.contains(specifier)
&& !cache.get_type_check_succeeded(specifier, &config_hash)
{
return false;
}
}
}
true
Expand Down