summaryrefslogtreecommitdiff
path: root/cli/cache
diff options
context:
space:
mode:
authorDavid Sherret <dsherret@users.noreply.github.com>2024-08-26 11:43:57 -0400
committerGitHub <noreply@github.com>2024-08-26 15:43:57 +0000
commita8ce02473add1a9bb7ddc0ff55767a620a3dc9e1 (patch)
treeeb24bb14c51406c8fba487fc200f21147224bd32 /cli/cache
parentd8dfe6dc97942a2e67c85cb799c08d0c95ed8aee (diff)
perf(cache): single cache file for typescript emit (#24994)
Diffstat (limited to 'cli/cache')
-rw-r--r--cli/cache/emit.rs165
1 files changed, 92 insertions, 73 deletions
diff --git a/cli/cache/emit.rs b/cli/cache/emit.rs
index 757df0b75..5e89f9a90 100644
--- a/cli/cache/emit.rs
+++ b/cli/cache/emit.rs
@@ -5,33 +5,25 @@ use std::path::PathBuf;
use deno_ast::ModuleSpecifier;
use deno_core::anyhow::anyhow;
use deno_core::error::AnyError;
-use deno_core::serde_json;
use deno_core::unsync::sync::AtomicFlag;
-use serde::Deserialize;
-use serde::Serialize;
use super::DiskCache;
-use super::FastInsecureHasher;
-
-#[derive(Debug, Deserialize, Serialize)]
-struct EmitMetadata {
- pub source_hash: u64,
- pub emit_hash: u64,
-}
/// The cache that stores previously emitted files.
pub struct EmitCache {
disk_cache: DiskCache,
- cli_version: &'static str,
emit_failed_flag: AtomicFlag,
+ file_serializer: EmitFileSerializer,
}
impl EmitCache {
pub fn new(disk_cache: DiskCache) -> Self {
Self {
disk_cache,
- cli_version: crate::version::DENO_VERSION_INFO.deno,
emit_failed_flag: Default::default(),
+ file_serializer: EmitFileSerializer {
+ cli_version: crate::version::DENO_VERSION_INFO.deno,
+ },
}
}
@@ -48,37 +40,11 @@ impl EmitCache {
specifier: &ModuleSpecifier,
expected_source_hash: u64,
) -> Option<Vec<u8>> {
- let meta_filename = self.get_meta_filename(specifier)?;
let emit_filename = self.get_emit_filename(specifier)?;
-
- // load and verify the meta data file is for this source and CLI version
- let bytes = self.disk_cache.get(&meta_filename).ok()?;
- let meta: EmitMetadata = serde_json::from_slice(&bytes).ok()?;
- if meta.source_hash != expected_source_hash {
- return None;
- }
-
- // load and verify the emit is for the meta data
- let emit_bytes = self.disk_cache.get(&emit_filename).ok()?;
- if meta.emit_hash != compute_emit_hash(&emit_bytes, self.cli_version) {
- return None;
- }
-
- // everything looks good, return it
- Some(emit_bytes)
- }
-
- /// Gets the filepath which stores the emit.
- pub fn get_emit_filepath(
- &self,
- specifier: &ModuleSpecifier,
- ) -> Option<PathBuf> {
- Some(
- self
- .disk_cache
- .location
- .join(self.get_emit_filename(specifier)?),
- )
+ let bytes = self.disk_cache.get(&emit_filename).ok()?;
+ self
+ .file_serializer
+ .deserialize(bytes, expected_source_hash)
}
/// Sets the emit code in the cache.
@@ -107,32 +73,26 @@ impl EmitCache {
return Ok(());
}
- let meta_filename = self
- .get_meta_filename(specifier)
- .ok_or_else(|| anyhow!("Could not get meta filename."))?;
let emit_filename = self
.get_emit_filename(specifier)
.ok_or_else(|| anyhow!("Could not get emit filename."))?;
-
- // save the metadata
- let metadata = EmitMetadata {
- source_hash,
- emit_hash: compute_emit_hash(code, self.cli_version),
- };
- self
- .disk_cache
- .set(&meta_filename, &serde_json::to_vec(&metadata)?)?;
-
- // save the emit source
- self.disk_cache.set(&emit_filename, code)?;
+ let cache_data = self.file_serializer.serialize(code, source_hash);
+ self.disk_cache.set(&emit_filename, &cache_data)?;
Ok(())
}
- fn get_meta_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> {
- self
- .disk_cache
- .get_cache_filename_with_extension(specifier, "meta")
+ /// Gets the filepath which stores the emit.
+ pub fn get_emit_filepath(
+ &self,
+ specifier: &ModuleSpecifier,
+ ) -> Option<PathBuf> {
+ Some(
+ self
+ .disk_cache
+ .location
+ .join(self.get_emit_filename(specifier)?),
+ )
}
fn get_emit_filename(&self, specifier: &ModuleSpecifier) -> Option<PathBuf> {
@@ -142,15 +102,68 @@ impl EmitCache {
}
}
-fn compute_emit_hash(bytes: &[u8], cli_version: &str) -> u64 {
- // it's ok to use an insecure hash here because
- // if someone can change the emit source then they
- // can also change the version hash
- FastInsecureHasher::new_without_deno_version() // use cli_version param instead
- .write(bytes)
- // emit should not be re-used between cli versions
- .write_str(cli_version)
- .finish()
+const LAST_LINE_PREFIX: &str = "\n// denoCacheMetadata=";
+
+struct EmitFileSerializer {
+ cli_version: &'static str,
+}
+
+impl EmitFileSerializer {
+ pub fn deserialize(
+ &self,
+ mut bytes: Vec<u8>,
+ expected_source_hash: u64,
+ ) -> Option<Vec<u8>> {
+ let last_newline_index = bytes.iter().rposition(|&b| b == b'\n')?;
+ let (content, last_line) = bytes.split_at(last_newline_index);
+ let hashes = last_line.strip_prefix(LAST_LINE_PREFIX.as_bytes())?;
+ let hashes = String::from_utf8_lossy(hashes);
+ let (source_hash, emit_hash) = hashes.split_once(',')?;
+
+ // verify the meta data file is for this source and CLI version
+ let source_hash = source_hash.parse::<u64>().ok()?;
+ if source_hash != expected_source_hash {
+ return None;
+ }
+ let emit_hash = emit_hash.parse::<u64>().ok()?;
+ // prevent using an emit from a different cli version or emits that were tampered with
+ if emit_hash != self.compute_emit_hash(content) {
+ return None;
+ }
+
+ // everything looks good, truncate and return it
+ bytes.truncate(content.len());
+ Some(bytes)
+ }
+
+ pub fn serialize(&self, code: &[u8], source_hash: u64) -> Vec<u8> {
+ let source_hash = source_hash.to_string();
+ let emit_hash = self.compute_emit_hash(code).to_string();
+ let capacity = code.len()
+ + LAST_LINE_PREFIX.len()
+ + source_hash.len()
+ + 1
+ + emit_hash.len();
+ let mut cache_data = Vec::with_capacity(capacity);
+ cache_data.extend(code);
+ cache_data.extend(LAST_LINE_PREFIX.as_bytes());
+ cache_data.extend(source_hash.as_bytes());
+ cache_data.push(b',');
+ cache_data.extend(emit_hash.as_bytes());
+ debug_assert_eq!(cache_data.len(), capacity);
+ cache_data
+ }
+
+ fn compute_emit_hash(&self, bytes: &[u8]) -> u64 {
+ // it's ok to use an insecure hash here because
+ // if someone can change the emit source then they
+ // can also change the version hash
+ crate::cache::FastInsecureHasher::new_without_deno_version() // use cli_version property instead
+ .write(bytes)
+ // emit should not be re-used between cli versions
+ .write_str(self.cli_version)
+ .finish()
+ }
}
#[cfg(test)]
@@ -165,7 +178,9 @@ mod test {
let disk_cache = DiskCache::new(temp_dir.path().as_path());
let cache = EmitCache {
disk_cache: disk_cache.clone(),
- cli_version: "1.0.0",
+ file_serializer: EmitFileSerializer {
+ cli_version: "1.0.0",
+ },
emit_failed_flag: Default::default(),
};
let to_string =
@@ -197,7 +212,9 @@ mod test {
// try changing the cli version (should not load previous ones)
let cache = EmitCache {
disk_cache: disk_cache.clone(),
- cli_version: "2.0.0",
+ file_serializer: EmitFileSerializer {
+ cli_version: "2.0.0",
+ },
emit_failed_flag: Default::default(),
};
assert_eq!(cache.get_emit_code(&specifier1, 10), None);
@@ -206,7 +223,9 @@ mod test {
// recreating the cache should still load the data because the CLI version is the same
let cache = EmitCache {
disk_cache,
- cli_version: "2.0.0",
+ file_serializer: EmitFileSerializer {
+ cli_version: "2.0.0",
+ },
emit_failed_flag: Default::default(),
};
assert_eq!(