diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2022-07-19 11:58:18 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-19 11:58:18 -0400 |
commit | 0ab262b901348e9251262a02bef17d14ed13b997 (patch) | |
tree | fc5a6e3926ea7480714cbc844098eca6c43c1ab5 /cli/cache | |
parent | e99d64acedb6e111d33f53599da494865978f1aa (diff) |
feat: emit files on demand and fix racy emit (#15220)
Diffstat (limited to 'cli/cache')
-rw-r--r-- | cli/cache/check.rs | 6 | ||||
-rw-r--r-- | cli/cache/common.rs | 35 | ||||
-rw-r--r-- | cli/cache/disk_cache.rs | 78 | ||||
-rw-r--r-- | cli/cache/emit.rs | 232 | ||||
-rw-r--r-- | cli/cache/incremental.rs | 15 | ||||
-rw-r--r-- | cli/cache/mod.rs | 53 |
6 files changed, 232 insertions, 187 deletions
diff --git a/cli/cache/check.rs b/cli/cache/check.rs index 4e0f8d912..6f3c41950 100644 --- a/cli/cache/check.rs +++ b/cli/cache/check.rs @@ -22,7 +22,7 @@ impl TypeCheckCache { Err(err) => { log::debug!( concat!( - "Failed creating internal type checking cache. ", + "Failed loading internal type checking cache. ", "Recreating...\n\nError details:\n{:#}", ), err @@ -35,7 +35,7 @@ impl TypeCheckCache { Err(err) => { log::debug!( concat!( - "Unable to create internal cache for type checking. ", + "Unable to load internal cache for type checking. ", "This will reduce the performance of type checking.\n\n", "Error details:\n{:#}", ), @@ -233,7 +233,7 @@ mod test { cache.set_tsbuildinfo(&specifier1, "test"); assert_eq!(cache.get_tsbuildinfo(&specifier1), Some("test".to_string())); - // recreating the cache should not remove the data because the CLI version and state hash is the same + // recreating the cache should not remove the data because the CLI version is the same let conn = cache.0.unwrap(); let cache = TypeCheckCache::from_connection(conn, "2.0.0".to_string()).unwrap(); diff --git a/cli/cache/common.rs b/cli/cache/common.rs index c01c1ab9a..b536d6cb2 100644 --- a/cli/cache/common.rs +++ b/cli/cache/common.rs @@ -1,16 +1,37 @@ // Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. +use std::hash::Hasher; + use deno_core::error::AnyError; use deno_runtime::deno_webstorage::rusqlite::Connection; -/// Very fast non-cryptographically secure hash. -pub fn fast_insecure_hash(bytes: &[u8]) -> u64 { - use std::hash::Hasher; - use twox_hash::XxHash64; +/// A very fast insecure hasher that uses the xxHash algorithm. +#[derive(Default)] +pub struct FastInsecureHasher(twox_hash::XxHash64); + +impl FastInsecureHasher { + pub fn new() -> Self { + Self::default() + } + + pub fn write_str(&mut self, text: &str) -> &mut Self { + self.write(text.as_bytes()); + self + } + + pub fn write(&mut self, bytes: &[u8]) -> &mut Self { + self.0.write(bytes); + self + } + + pub fn write_u64(&mut self, value: u64) -> &mut Self { + self.0.write_u64(value); + self + } - let mut hasher = XxHash64::default(); - hasher.write(bytes); - hasher.finish() + pub fn finish(&self) -> u64 { + self.0.finish() + } } /// Runs the common sqlite pragma. diff --git a/cli/cache/disk_cache.rs b/cli/cache/disk_cache.rs index 01352c398..5a2f11e3c 100644 --- a/cli/cache/disk_cache.rs +++ b/cli/cache/disk_cache.rs @@ -3,13 +3,6 @@ use crate::fs_util; use crate::http_cache::url_to_filename; -use super::CacheType; -use super::Cacher; -use super::EmitMetadata; - -use deno_ast::ModuleSpecifier; -use deno_core::error::AnyError; -use deno_core::serde_json; use deno_core::url::Host; use deno_core::url::Url; use std::ffi::OsStr; @@ -154,77 +147,6 @@ impl DiskCache { fs_util::atomic_write_file(&path, data, crate::http_cache::CACHE_PERM) .map_err(|e| with_io_context(&e, format!("{:#?}", &path))) } - - fn get_emit_metadata( - &self, - specifier: &ModuleSpecifier, - ) -> Option<EmitMetadata> { - let filename = self.get_cache_filename_with_extension(specifier, "meta")?; - let bytes = self.get(&filename).ok()?; - serde_json::from_slice(&bytes).ok() - } - - fn set_emit_metadata( - &self, - specifier: &ModuleSpecifier, - data: EmitMetadata, - ) -> Result<(), AnyError> { - let filename = self - .get_cache_filename_with_extension(specifier, "meta") - .unwrap(); - let bytes = serde_json::to_vec(&data)?; - self.set(&filename, &bytes).map_err(|e| e.into()) - } -} - -// todo(13302): remove and replace with sqlite database -impl Cacher for DiskCache { - fn get( - &self, - cache_type: CacheType, - specifier: &ModuleSpecifier, - ) -> Option<String> { - let extension = match cache_type { - CacheType::Emit => "js", - CacheType::SourceMap => "js.map", - CacheType::Version => { - return self.get_emit_metadata(specifier).map(|d| d.version_hash) - } - }; - let filename = - self.get_cache_filename_with_extension(specifier, extension)?; - self - .get(&filename) - .ok() - .and_then(|b| String::from_utf8(b).ok()) - } - - fn set( - &self, - cache_type: CacheType, - specifier: &ModuleSpecifier, - value: String, - ) -> Result<(), AnyError> { - let extension = match cache_type { - CacheType::Emit => "js", - CacheType::SourceMap => "js.map", - CacheType::Version => { - let data = if let Some(mut data) = self.get_emit_metadata(specifier) { - data.version_hash = value; - data - } else { - EmitMetadata { - version_hash: value, - } - }; - return self.set_emit_metadata(specifier, data); - } - }; - let filename = self - .get_cache_filename_with_extension(specifier, extension) - .unwrap(); - self.set(&filename, value.as_bytes()).map_err(|e| e.into()) - } } #[cfg(test)] diff --git a/cli/cache/emit.rs b/cli/cache/emit.rs index e1469b862..61039a966 100644 --- a/cli/cache/emit.rs +++ b/cli/cache/emit.rs @@ -1,71 +1,209 @@ // Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. +use std::path::PathBuf; + use deno_ast::ModuleSpecifier; +use deno_core::anyhow::anyhow; use deno_core::error::AnyError; +use deno_core::serde_json; +use serde::Deserialize; +use serde::Serialize; -use super::CacheType; -use super::Cacher; +use super::DiskCache; +use super::FastInsecureHasher; -/// Emit cache for a single file. -#[derive(Debug, Clone, PartialEq)] -pub struct SpecifierEmitCacheData { +#[derive(Debug, Deserialize, Serialize)] +struct EmitMetadata { pub source_hash: String, - pub text: String, - pub map: Option<String>, + pub emit_hash: String, + // purge the cache between cli versions + pub cli_version: String, } -pub trait EmitCache { - /// Gets the emit data from the cache. - fn get_emit_data( - &self, - specifier: &ModuleSpecifier, - ) -> Option<SpecifierEmitCacheData>; - /// Sets the emit data in the cache. - fn set_emit_data( - &self, - specifier: ModuleSpecifier, - data: SpecifierEmitCacheData, - ) -> Result<(), AnyError>; - /// Gets the stored hash of the source of the provider specifier - /// to tell if the emit is out of sync with the source. - /// TODO(13302): this is actually not reliable and should be removed - /// once switching to an sqlite db - fn get_source_hash(&self, specifier: &ModuleSpecifier) -> Option<String>; - /// Gets the emitted JavaScript of the TypeScript source. - /// TODO(13302): remove this once switching to an sqlite db - fn get_emit_text(&self, specifier: &ModuleSpecifier) -> Option<String>; +/// The cache that stores previously emitted files. +#[derive(Clone)] +pub struct EmitCache { + disk_cache: DiskCache, + cli_version: String, } -impl<T: Cacher> EmitCache for T { - fn get_emit_data( +impl EmitCache { + pub fn new(disk_cache: DiskCache) -> Self { + Self { + disk_cache, + cli_version: crate::version::deno(), + } + } + + /// Gets the emitted code with embedded sourcemap from the cache. + /// + /// The expected source hash is used in order to verify + /// that you're getting a value from the cache that is + /// for the provided source. + /// + /// Cached emits from previous CLI releases will not be returned + /// or emits that do not match the source. + pub fn get_emit_code( &self, specifier: &ModuleSpecifier, - ) -> Option<SpecifierEmitCacheData> { - Some(SpecifierEmitCacheData { - source_hash: self.get_source_hash(specifier)?, - text: self.get_emit_text(specifier)?, - map: self.get(CacheType::SourceMap, specifier), - }) + expected_source_hash: u64, + ) -> Option<String> { + let meta_filename = self.get_meta_filename(specifier)?; + let emit_filename = self.get_emit_filename(specifier)?; + + // load and verify the meta data file is for this source and CLI version + let bytes = self.disk_cache.get(&meta_filename).ok()?; + let meta: EmitMetadata = serde_json::from_slice(&bytes).ok()?; + if meta.source_hash != expected_source_hash.to_string() + || meta.cli_version != self.cli_version + { + return None; + } + + // load and verify the emit is for the meta data + let emit_bytes = self.disk_cache.get(&emit_filename).ok()?; + if meta.emit_hash != compute_emit_hash(&emit_bytes) { + return None; + } + + // everything looks good, return it + let emit_text = String::from_utf8(emit_bytes).ok()?; + Some(emit_text) } - fn get_source_hash(&self, specifier: &ModuleSpecifier) -> Option<String> { - self.get(CacheType::Version, specifier) + /// Gets the filepath which stores the emit. + pub fn get_emit_filepath( + &self, + specifier: &ModuleSpecifier, + ) -> Option<PathBuf> { + Some( + self + .disk_cache + .location + .join(self.get_emit_filename(specifier)?), + ) } - fn get_emit_text(&self, specifier: &ModuleSpecifier) -> Option<String> { - self.get(CacheType::Emit, specifier) + /// Sets the emit code in the cache. + pub fn set_emit_code( + &self, + specifier: &ModuleSpecifier, + source_hash: u64, + code: &str, + ) { + if let Err(err) = self.set_emit_code_result(specifier, source_hash, code) { + // should never error here, but if it ever does don't fail + if cfg!(debug_assertions) { + panic!("Error saving emit data ({}): {}", specifier, err); + } else { + log::debug!("Error saving emit data({}): {}", specifier, err); + } + } } - fn set_emit_data( + fn set_emit_code_result( &self, - specifier: ModuleSpecifier, - data: SpecifierEmitCacheData, + specifier: &ModuleSpecifier, + source_hash: u64, + code: &str, ) -> Result<(), AnyError> { - self.set(CacheType::Version, &specifier, data.source_hash)?; - self.set(CacheType::Emit, &specifier, data.text)?; - if let Some(map) = data.map { - self.set(CacheType::SourceMap, &specifier, map)?; - } + let meta_filename = self + .get_meta_filename(specifier) + .ok_or_else(|| anyhow!("Could not get meta filename."))?; + let emit_filename = self + .get_emit_filename(specifier) + .ok_or_else(|| anyhow!("Could not get emit filename."))?; + + // save the metadata + let metadata = EmitMetadata { + cli_version: self.cli_version.to_string(), + source_hash: source_hash.to_string(), + emit_hash: compute_emit_hash(code.as_bytes()), + }; + self + .disk_cache + .set(&meta_filename, &serde_json::to_vec(&metadata)?)?; + + // save the emit source + self.disk_cache.set(&emit_filename, code.as_bytes())?; + Ok(()) } + + fn get_meta_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> { + self + .disk_cache + .get_cache_filename_with_extension(specifier, "meta") + } + + fn get_emit_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> { + self + .disk_cache + .get_cache_filename_with_extension(specifier, "js") + } +} + +fn compute_emit_hash(bytes: &[u8]) -> String { + // it's ok to use an insecure hash here because + // if someone can change the emit source then they + // can also change the version hash + FastInsecureHasher::new().write(bytes).finish().to_string() +} + +#[cfg(test)] +mod test { + use test_util::TempDir; + + use super::*; + + #[test] + pub fn emit_cache_general_use() { + let temp_dir = TempDir::new(); + let disk_cache = DiskCache::new(temp_dir.path()); + let cache = EmitCache { + disk_cache: disk_cache.clone(), + cli_version: "1.0.0".to_string(), + }; + + let specifier1 = + ModuleSpecifier::from_file_path(temp_dir.path().join("file1.ts")) + .unwrap(); + let specifier2 = + ModuleSpecifier::from_file_path(temp_dir.path().join("file2.ts")) + .unwrap(); + assert_eq!(cache.get_emit_code(&specifier1, 1), None); + let emit_code1 = "text1".to_string(); + let emit_code2 = "text2".to_string(); + cache.set_emit_code(&specifier1, 10, &emit_code1); + cache.set_emit_code(&specifier2, 2, &emit_code2); + // providing the incorrect source hash + assert_eq!(cache.get_emit_code(&specifier1, 5), None); + // providing the correct source hash + assert_eq!( + cache.get_emit_code(&specifier1, 10), + Some(emit_code1.clone()), + ); + assert_eq!(cache.get_emit_code(&specifier2, 2), Some(emit_code2),); + + // try changing the cli version (should not load previous ones) + let cache = EmitCache { + disk_cache: disk_cache.clone(), + cli_version: "2.0.0".to_string(), + }; + assert_eq!(cache.get_emit_code(&specifier1, 10), None); + cache.set_emit_code(&specifier1, 5, &emit_code1); + + // recreating the cache should still load the data because the CLI version is the same + let cache = EmitCache { + disk_cache, + cli_version: "2.0.0".to_string(), + }; + assert_eq!(cache.get_emit_code(&specifier1, 5), Some(emit_code1)); + + // adding when already exists should not cause issue + let emit_code3 = "asdf".to_string(); + cache.set_emit_code(&specifier1, 20, &emit_code3); + assert_eq!(cache.get_emit_code(&specifier1, 5), None); + assert_eq!(cache.get_emit_code(&specifier1, 20), Some(emit_code3)); + } } diff --git a/cli/cache/incremental.rs b/cli/cache/incremental.rs index b5fff0734..da832a8b5 100644 --- a/cli/cache/incremental.rs +++ b/cli/cache/incremental.rs @@ -12,8 +12,8 @@ use deno_runtime::deno_webstorage::rusqlite::Connection; use serde::Serialize; use tokio::task::JoinHandle; -use super::common::fast_insecure_hash; use super::common::run_sqlite_pragma; +use super::common::FastInsecureHasher; /// Cache used to skip formatting/linting a file again when we /// know it is already formatted or has no lint diagnostics. @@ -79,8 +79,9 @@ impl IncrementalCacheInner { state: &TState, initial_file_paths: &[PathBuf], ) -> Result<Self, AnyError> { - let state_hash = - fast_insecure_hash(serde_json::to_string(state).unwrap().as_bytes()); + let state_hash = FastInsecureHasher::new() + .write_str(&serde_json::to_string(state).unwrap()) + .finish(); let sql_cache = SqlIncrementalCache::new(db_file_path, state_hash)?; Ok(Self::from_sql_incremental_cache( sql_cache, @@ -123,13 +124,15 @@ impl IncrementalCacheInner { pub fn is_file_same(&self, file_path: &Path, file_text: &str) -> bool { match self.previous_hashes.get(file_path) { - Some(hash) => *hash == fast_insecure_hash(file_text.as_bytes()), + Some(hash) => { + *hash == FastInsecureHasher::new().write_str(file_text).finish() + } None => false, } } pub fn update_file(&self, file_path: &Path, file_text: &str) { - let hash = fast_insecure_hash(file_text.as_bytes()); + let hash = FastInsecureHasher::new().write_str(file_text).finish(); if let Some(previous_hash) = self.previous_hashes.get(file_path) { if *previous_hash == hash { return; // do not bother updating the db file because nothing has changed @@ -334,7 +337,7 @@ mod test { .unwrap(); let file_path = PathBuf::from("/mod.ts"); let file_text = "test"; - let file_hash = fast_insecure_hash(file_text.as_bytes()); + let file_hash = FastInsecureHasher::new().write_str(file_text).finish(); sql_cache.set_source_hash(&file_path, file_hash).unwrap(); let cache = IncrementalCacheInner::from_sql_incremental_cache( sql_cache, diff --git a/cli/cache/mod.rs b/cli/cache/mod.rs index f363d8fa8..7482826cf 100644 --- a/cli/cache/mod.rs +++ b/cli/cache/mod.rs @@ -3,10 +3,7 @@ use crate::errors::get_error_class_name; use crate::file_fetcher::FileFetcher; -use deno_core::error::AnyError; use deno_core::futures::FutureExt; -use deno_core::serde::Deserialize; -use deno_core::serde::Serialize; use deno_core::ModuleSpecifier; use deno_graph::source::CacheInfo; use deno_graph::source::LoadFuture; @@ -22,44 +19,15 @@ mod emit; mod incremental; pub use check::TypeCheckCache; +pub use common::FastInsecureHasher; pub use disk_cache::DiskCache; pub use emit::EmitCache; -pub use emit::SpecifierEmitCacheData; pub use incremental::IncrementalCache; -#[derive(Debug, Deserialize, Serialize)] -pub struct EmitMetadata { - pub version_hash: String, -} - -pub enum CacheType { - Emit, - SourceMap, - Version, -} - -/// A trait which provides a concise implementation to getting and setting -/// values in a cache. -pub trait Cacher { - /// Get a value from the cache. - fn get( - &self, - cache_type: CacheType, - specifier: &ModuleSpecifier, - ) -> Option<String>; - /// Set a value in the cache. - fn set( - &self, - cache_type: CacheType, - specifier: &ModuleSpecifier, - value: String, - ) -> Result<(), AnyError>; -} - /// A "wrapper" for the FileFetcher and DiskCache for the Deno CLI that provides /// a concise interface to the DENO_DIR when building module graphs. pub struct FetchCacher { - disk_cache: DiskCache, + emit_cache: EmitCache, dynamic_permissions: Permissions, file_fetcher: Arc<FileFetcher>, root_permissions: Permissions, @@ -67,7 +35,7 @@ pub struct FetchCacher { impl FetchCacher { pub fn new( - disk_cache: DiskCache, + emit_cache: EmitCache, file_fetcher: FileFetcher, root_permissions: Permissions, dynamic_permissions: Permissions, @@ -75,7 +43,7 @@ impl FetchCacher { let file_fetcher = Arc::new(file_fetcher); Self { - disk_cache, + emit_cache, dynamic_permissions, file_fetcher, root_permissions, @@ -87,21 +55,14 @@ impl Loader for FetchCacher { fn get_cache_info(&self, specifier: &ModuleSpecifier) -> Option<CacheInfo> { let local = self.file_fetcher.get_local_path(specifier)?; if local.is_file() { - let location = &self.disk_cache.location; let emit = self - .disk_cache - .get_cache_filename_with_extension(specifier, "js") - .map(|p| location.join(p)) - .filter(|p| p.is_file()); - let map = self - .disk_cache - .get_cache_filename_with_extension(specifier, "js.map") - .map(|p| location.join(p)) + .emit_cache + .get_emit_filepath(specifier) .filter(|p| p.is_file()); Some(CacheInfo { local: Some(local), emit, - map, + map: None, }) } else { None |