diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2024-08-26 11:43:57 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-26 15:43:57 +0000 |
commit | a8ce02473add1a9bb7ddc0ff55767a620a3dc9e1 (patch) | |
tree | eb24bb14c51406c8fba487fc200f21147224bd32 /cli/cache | |
parent | d8dfe6dc97942a2e67c85cb799c08d0c95ed8aee (diff) |
perf(cache): single cache file for typescript emit (#24994)
Diffstat (limited to 'cli/cache')
-rw-r--r-- | cli/cache/emit.rs | 165 |
1 files changed, 92 insertions, 73 deletions
diff --git a/cli/cache/emit.rs b/cli/cache/emit.rs index 757df0b75..5e89f9a90 100644 --- a/cli/cache/emit.rs +++ b/cli/cache/emit.rs @@ -5,33 +5,25 @@ use std::path::PathBuf; use deno_ast::ModuleSpecifier; use deno_core::anyhow::anyhow; use deno_core::error::AnyError; -use deno_core::serde_json; use deno_core::unsync::sync::AtomicFlag; -use serde::Deserialize; -use serde::Serialize; use super::DiskCache; -use super::FastInsecureHasher; - -#[derive(Debug, Deserialize, Serialize)] -struct EmitMetadata { - pub source_hash: u64, - pub emit_hash: u64, -} /// The cache that stores previously emitted files. pub struct EmitCache { disk_cache: DiskCache, - cli_version: &'static str, emit_failed_flag: AtomicFlag, + file_serializer: EmitFileSerializer, } impl EmitCache { pub fn new(disk_cache: DiskCache) -> Self { Self { disk_cache, - cli_version: crate::version::DENO_VERSION_INFO.deno, emit_failed_flag: Default::default(), + file_serializer: EmitFileSerializer { + cli_version: crate::version::DENO_VERSION_INFO.deno, + }, } } @@ -48,37 +40,11 @@ impl EmitCache { specifier: &ModuleSpecifier, expected_source_hash: u64, ) -> Option<Vec<u8>> { - let meta_filename = self.get_meta_filename(specifier)?; let emit_filename = self.get_emit_filename(specifier)?; - - // load and verify the meta data file is for this source and CLI version - let bytes = self.disk_cache.get(&meta_filename).ok()?; - let meta: EmitMetadata = serde_json::from_slice(&bytes).ok()?; - if meta.source_hash != expected_source_hash { - return None; - } - - // load and verify the emit is for the meta data - let emit_bytes = self.disk_cache.get(&emit_filename).ok()?; - if meta.emit_hash != compute_emit_hash(&emit_bytes, self.cli_version) { - return None; - } - - // everything looks good, return it - Some(emit_bytes) - } - - /// Gets the filepath which stores the emit. - pub fn get_emit_filepath( - &self, - specifier: &ModuleSpecifier, - ) -> Option<PathBuf> { - Some( - self - .disk_cache - .location - .join(self.get_emit_filename(specifier)?), - ) + let bytes = self.disk_cache.get(&emit_filename).ok()?; + self + .file_serializer + .deserialize(bytes, expected_source_hash) } /// Sets the emit code in the cache. @@ -107,32 +73,26 @@ impl EmitCache { return Ok(()); } - let meta_filename = self - .get_meta_filename(specifier) - .ok_or_else(|| anyhow!("Could not get meta filename."))?; let emit_filename = self .get_emit_filename(specifier) .ok_or_else(|| anyhow!("Could not get emit filename."))?; - - // save the metadata - let metadata = EmitMetadata { - source_hash, - emit_hash: compute_emit_hash(code, self.cli_version), - }; - self - .disk_cache - .set(&meta_filename, &serde_json::to_vec(&metadata)?)?; - - // save the emit source - self.disk_cache.set(&emit_filename, code)?; + let cache_data = self.file_serializer.serialize(code, source_hash); + self.disk_cache.set(&emit_filename, &cache_data)?; Ok(()) } - fn get_meta_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> { - self - .disk_cache - .get_cache_filename_with_extension(specifier, "meta") + /// Gets the filepath which stores the emit. + pub fn get_emit_filepath( + &self, + specifier: &ModuleSpecifier, + ) -> Option<PathBuf> { + Some( + self + .disk_cache + .location + .join(self.get_emit_filename(specifier)?), + ) } fn get_emit_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> { @@ -142,15 +102,68 @@ impl EmitCache { } } -fn compute_emit_hash(bytes: &[u8], cli_version: &str) -> u64 { - // it's ok to use an insecure hash here because - // if someone can change the emit source then they - // can also change the version hash - FastInsecureHasher::new_without_deno_version() // use cli_version param instead - .write(bytes) - // emit should not be re-used between cli versions - .write_str(cli_version) - .finish() +const LAST_LINE_PREFIX: &str = "\n// denoCacheMetadata="; + +struct EmitFileSerializer { + cli_version: &'static str, +} + +impl EmitFileSerializer { + pub fn deserialize( + &self, + mut bytes: Vec<u8>, + expected_source_hash: u64, + ) -> Option<Vec<u8>> { + let last_newline_index = bytes.iter().rposition(|&b| b == b'\n')?; + let (content, last_line) = bytes.split_at(last_newline_index); + let hashes = last_line.strip_prefix(LAST_LINE_PREFIX.as_bytes())?; + let hashes = String::from_utf8_lossy(hashes); + let (source_hash, emit_hash) = hashes.split_once(',')?; + + // verify the meta data file is for this source and CLI version + let source_hash = source_hash.parse::<u64>().ok()?; + if source_hash != expected_source_hash { + return None; + } + let emit_hash = emit_hash.parse::<u64>().ok()?; + // prevent using an emit from a different cli version or emits that were tampered with + if emit_hash != self.compute_emit_hash(content) { + return None; + } + + // everything looks good, truncate and return it + bytes.truncate(content.len()); + Some(bytes) + } + + pub fn serialize(&self, code: &[u8], source_hash: u64) -> Vec<u8> { + let source_hash = source_hash.to_string(); + let emit_hash = self.compute_emit_hash(code).to_string(); + let capacity = code.len() + + LAST_LINE_PREFIX.len() + + source_hash.len() + + 1 + + emit_hash.len(); + let mut cache_data = Vec::with_capacity(capacity); + cache_data.extend(code); + cache_data.extend(LAST_LINE_PREFIX.as_bytes()); + cache_data.extend(source_hash.as_bytes()); + cache_data.push(b','); + cache_data.extend(emit_hash.as_bytes()); + debug_assert_eq!(cache_data.len(), capacity); + cache_data + } + + fn compute_emit_hash(&self, bytes: &[u8]) -> u64 { + // it's ok to use an insecure hash here because + // if someone can change the emit source then they + // can also change the version hash + crate::cache::FastInsecureHasher::new_without_deno_version() // use cli_version property instead + .write(bytes) + // emit should not be re-used between cli versions + .write_str(self.cli_version) + .finish() + } } #[cfg(test)] @@ -165,7 +178,9 @@ mod test { let disk_cache = DiskCache::new(temp_dir.path().as_path()); let cache = EmitCache { disk_cache: disk_cache.clone(), - cli_version: "1.0.0", + file_serializer: EmitFileSerializer { + cli_version: "1.0.0", + }, emit_failed_flag: Default::default(), }; let to_string = @@ -197,7 +212,9 @@ mod test { // try changing the cli version (should not load previous ones) let cache = EmitCache { disk_cache: disk_cache.clone(), - cli_version: "2.0.0", + file_serializer: EmitFileSerializer { + cli_version: "2.0.0", + }, emit_failed_flag: Default::default(), }; assert_eq!(cache.get_emit_code(&specifier1, 10), None); @@ -206,7 +223,9 @@ mod test { // recreating the cache should still load the data because the CLI version is the same let cache = EmitCache { disk_cache, - cli_version: "2.0.0", + file_serializer: EmitFileSerializer { + cli_version: "2.0.0", + }, emit_failed_flag: Default::default(), }; assert_eq!( |