diff options
Diffstat (limited to 'cli/standalone/code_cache.rs')
-rw-r--r-- | cli/standalone/code_cache.rs | 514 |
1 files changed, 514 insertions, 0 deletions
diff --git a/cli/standalone/code_cache.rs b/cli/standalone/code_cache.rs new file mode 100644 index 000000000..25b490544 --- /dev/null +++ b/cli/standalone/code_cache.rs @@ -0,0 +1,514 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::io::BufReader; +use std::io::BufWriter; +use std::io::Read; +use std::io::Write; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; + +use deno_ast::ModuleSpecifier; +use deno_core::anyhow::bail; +use deno_core::error::AnyError; +use deno_core::parking_lot::Mutex; +use deno_core::unsync::sync::AtomicFlag; +use deno_runtime::code_cache::CodeCache; +use deno_runtime::code_cache::CodeCacheType; + +use crate::cache::FastInsecureHasher; +use crate::util::path::get_atomic_file_path; +use crate::worker::CliCodeCache; + +enum CodeCacheStrategy { + FirstRun(FirstRunCodeCacheStrategy), + SubsequentRun(SubsequentRunCodeCacheStrategy), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DenoCompileCodeCacheEntry { + pub source_hash: u64, + pub data: Vec<u8>, +} + +pub struct DenoCompileCodeCache { + strategy: CodeCacheStrategy, +} + +impl DenoCompileCodeCache { + pub fn new(file_path: PathBuf, cache_key: u64) -> Self { + // attempt to deserialize the cache data + match deserialize(&file_path, cache_key) { + Ok(data) => { + log::debug!("Loaded {} code cache entries", data.len()); + Self { + strategy: CodeCacheStrategy::SubsequentRun( + SubsequentRunCodeCacheStrategy { + is_finished: AtomicFlag::lowered(), + data: Mutex::new(data), + }, + ), + } + } + Err(err) => { + log::debug!("Failed to deserialize code cache: {:#}", err); + Self { + strategy: CodeCacheStrategy::FirstRun(FirstRunCodeCacheStrategy { + cache_key, + file_path, + is_finished: AtomicFlag::lowered(), + data: Mutex::new(FirstRunCodeCacheData { + cache: HashMap::new(), + add_count: 0, + }), + }), + } + } + } + } +} + +impl CodeCache for DenoCompileCodeCache { + fn get_sync( + &self, + specifier: &ModuleSpecifier, + code_cache_type: CodeCacheType, + source_hash: u64, + ) -> Option<Vec<u8>> { + match &self.strategy { + CodeCacheStrategy::FirstRun(strategy) => { + if !strategy.is_finished.is_raised() { + // we keep track of how many times the cache is requested + // then serialize the cache when we get that number of + // "set" calls + strategy.data.lock().add_count += 1; + } + None + } + CodeCacheStrategy::SubsequentRun(strategy) => { + if strategy.is_finished.is_raised() { + return None; + } + strategy.take_from_cache(specifier, code_cache_type, source_hash) + } + } + } + + fn set_sync( + &self, + specifier: ModuleSpecifier, + code_cache_type: CodeCacheType, + source_hash: u64, + bytes: &[u8], + ) { + match &self.strategy { + CodeCacheStrategy::FirstRun(strategy) => { + if strategy.is_finished.is_raised() { + return; + } + + let data_to_serialize = { + let mut data = strategy.data.lock(); + data.cache.insert( + (specifier.to_string(), code_cache_type), + DenoCompileCodeCacheEntry { + source_hash, + data: bytes.to_vec(), + }, + ); + if data.add_count != 0 { + data.add_count -= 1; + } + if data.add_count == 0 { + // don't allow using the cache anymore + strategy.is_finished.raise(); + if data.cache.is_empty() { + None + } else { + Some(std::mem::take(&mut data.cache)) + } + } else { + None + } + }; + if let Some(cache_data) = &data_to_serialize { + strategy.write_cache_data(cache_data); + } + } + CodeCacheStrategy::SubsequentRun(_) => { + // do nothing + } + } + } +} + +impl CliCodeCache for DenoCompileCodeCache { + fn enabled(&self) -> bool { + match &self.strategy { + CodeCacheStrategy::FirstRun(strategy) => { + !strategy.is_finished.is_raised() + } + CodeCacheStrategy::SubsequentRun(strategy) => { + !strategy.is_finished.is_raised() + } + } + } + + fn as_code_cache(self: Arc<Self>) -> Arc<dyn CodeCache> { + self + } +} + +type CodeCacheKey = (String, CodeCacheType); + +struct FirstRunCodeCacheData { + cache: HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, + add_count: usize, +} + +struct FirstRunCodeCacheStrategy { + cache_key: u64, + file_path: PathBuf, + is_finished: AtomicFlag, + data: Mutex<FirstRunCodeCacheData>, +} + +impl FirstRunCodeCacheStrategy { + fn write_cache_data( + &self, + cache_data: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, + ) { + let count = cache_data.len(); + let temp_file = get_atomic_file_path(&self.file_path); + match serialize(&temp_file, self.cache_key, cache_data) { + Ok(()) => { + if let Err(err) = std::fs::rename(&temp_file, &self.file_path) { + log::debug!("Failed to rename code cache: {}", err); + } else { + log::debug!("Serialized {} code cache entries", count); + } + } + Err(err) => { + let _ = std::fs::remove_file(&temp_file); + log::debug!("Failed to serialize code cache: {}", err); + } + } + } +} + +struct SubsequentRunCodeCacheStrategy { + is_finished: AtomicFlag, + data: Mutex<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>>, +} + +impl SubsequentRunCodeCacheStrategy { + fn take_from_cache( + &self, + specifier: &ModuleSpecifier, + code_cache_type: CodeCacheType, + source_hash: u64, + ) -> Option<Vec<u8>> { + let mut data = self.data.lock(); + // todo(dsherret): how to avoid the clone here? + let entry = data.remove(&(specifier.to_string(), code_cache_type))?; + if entry.source_hash != source_hash { + return None; + } + if data.is_empty() { + self.is_finished.raise(); + } + Some(entry.data) + } +} + +/// File format: +/// - <header> +/// - <cache key> +/// - <u32: number of entries> +/// - <[entry length]> - u64 * number of entries +/// - <[entry]> +/// - <[u8]: entry data> +/// - <String: specifier> +/// - <u8>: code cache type +/// - <u32: specifier length> +/// - <u64: source hash> +/// - <u64: entry data hash> +fn serialize( + file_path: &Path, + cache_key: u64, + cache: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, +) -> Result<(), AnyError> { + let cache_file = std::fs::OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(file_path)?; + let mut writer = BufWriter::new(cache_file); + serialize_with_writer(&mut writer, cache_key, cache) +} + +fn serialize_with_writer<T: Write>( + writer: &mut BufWriter<T>, + cache_key: u64, + cache: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, +) -> Result<(), AnyError> { + // header + writer.write_all(&cache_key.to_le_bytes())?; + writer.write_all(&(cache.len() as u32).to_le_bytes())?; + // lengths of each entry + for ((specifier, _), entry) in cache { + let len: u64 = + entry.data.len() as u64 + specifier.len() as u64 + 1 + 4 + 8 + 8; + writer.write_all(&len.to_le_bytes())?; + } + // entries + for ((specifier, code_cache_type), entry) in cache { + writer.write_all(&entry.data)?; + writer.write_all(&[match code_cache_type { + CodeCacheType::EsModule => 0, + CodeCacheType::Script => 1, + }])?; + writer.write_all(specifier.as_bytes())?; + writer.write_all(&(specifier.len() as u32).to_le_bytes())?; + writer.write_all(&entry.source_hash.to_le_bytes())?; + let hash: u64 = FastInsecureHasher::new_without_deno_version() + .write(&entry.data) + .finish(); + writer.write_all(&hash.to_le_bytes())?; + } + + writer.flush()?; + + Ok(()) +} + +fn deserialize( + file_path: &Path, + expected_cache_key: u64, +) -> Result<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, AnyError> { + let cache_file = std::fs::File::open(file_path)?; + let mut reader = BufReader::new(cache_file); + deserialize_with_reader(&mut reader, expected_cache_key) +} + +fn deserialize_with_reader<T: Read>( + reader: &mut BufReader<T>, + expected_cache_key: u64, +) -> Result<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, AnyError> { + // it's very important to use this below so that a corrupt cache file + // doesn't cause a memory allocation error + fn new_vec_sized<T: Clone>( + capacity: usize, + default_value: T, + ) -> Result<Vec<T>, AnyError> { + let mut vec = Vec::new(); + vec.try_reserve(capacity)?; + vec.resize(capacity, default_value); + Ok(vec) + } + + fn try_subtract(a: usize, b: usize) -> Result<usize, AnyError> { + if a < b { + bail!("Integer underflow"); + } + Ok(a - b) + } + + let mut header_bytes = vec![0; 8 + 4]; + reader.read_exact(&mut header_bytes)?; + let actual_cache_key = u64::from_le_bytes(header_bytes[..8].try_into()?); + if actual_cache_key != expected_cache_key { + // cache bust + bail!("Cache key mismatch"); + } + let len = u32::from_le_bytes(header_bytes[8..].try_into()?) as usize; + // read the lengths for each entry found in the file + let entry_len_bytes_capacity = len * 8; + let mut entry_len_bytes = new_vec_sized(entry_len_bytes_capacity, 0)?; + reader.read_exact(&mut entry_len_bytes)?; + let mut lengths = Vec::new(); + lengths.try_reserve(len)?; + for i in 0..len { + let pos = i * 8; + lengths.push( + u64::from_le_bytes(entry_len_bytes[pos..pos + 8].try_into()?) as usize, + ); + } + + let mut map = HashMap::new(); + map.try_reserve(len)?; + for len in lengths { + let mut buffer = new_vec_sized(len, 0)?; + reader.read_exact(&mut buffer)?; + let entry_data_hash_start_pos = try_subtract(buffer.len(), 8)?; + let expected_entry_data_hash = + u64::from_le_bytes(buffer[entry_data_hash_start_pos..].try_into()?); + let source_hash_start_pos = try_subtract(entry_data_hash_start_pos, 8)?; + let source_hash = u64::from_le_bytes( + buffer[source_hash_start_pos..entry_data_hash_start_pos].try_into()?, + ); + let specifier_end_pos = try_subtract(source_hash_start_pos, 4)?; + let specifier_len = u32::from_le_bytes( + buffer[specifier_end_pos..source_hash_start_pos].try_into()?, + ) as usize; + let specifier_start_pos = try_subtract(specifier_end_pos, specifier_len)?; + let specifier = String::from_utf8( + buffer[specifier_start_pos..specifier_end_pos].to_vec(), + )?; + let code_cache_type_pos = try_subtract(specifier_start_pos, 1)?; + let code_cache_type = match buffer[code_cache_type_pos] { + 0 => CodeCacheType::EsModule, + 1 => CodeCacheType::Script, + _ => bail!("Invalid code cache type"), + }; + buffer.truncate(code_cache_type_pos); + let actual_entry_data_hash: u64 = + FastInsecureHasher::new_without_deno_version() + .write(&buffer) + .finish(); + if expected_entry_data_hash != actual_entry_data_hash { + bail!("Hash mismatch.") + } + map.insert( + (specifier, code_cache_type), + DenoCompileCodeCacheEntry { + source_hash, + data: buffer, + }, + ); + } + + Ok(map) +} + +#[cfg(test)] +mod test { + use test_util::TempDir; + + use super::*; + use std::fs::File; + + #[test] + fn serialize_deserialize() { + let cache_key = 123456; + let cache = { + let mut cache = HashMap::new(); + cache.insert( + ("specifier1".to_string(), CodeCacheType::EsModule), + DenoCompileCodeCacheEntry { + source_hash: 1, + data: vec![1, 2, 3], + }, + ); + cache.insert( + ("specifier2".to_string(), CodeCacheType::EsModule), + DenoCompileCodeCacheEntry { + source_hash: 2, + data: vec![4, 5, 6], + }, + ); + cache.insert( + ("specifier2".to_string(), CodeCacheType::Script), + DenoCompileCodeCacheEntry { + source_hash: 2, + data: vec![6, 5, 1], + }, + ); + cache + }; + let mut buffer = Vec::new(); + serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache) + .unwrap(); + let deserialized = + deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key) + .unwrap(); + assert_eq!(cache, deserialized); + } + + #[test] + fn serialize_deserialize_empty() { + let cache_key = 1234; + let cache = HashMap::new(); + let mut buffer = Vec::new(); + serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache) + .unwrap(); + let deserialized = + deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key) + .unwrap(); + assert_eq!(cache, deserialized); + } + + #[test] + fn serialize_deserialize_corrupt() { + let buffer = "corrupttestingtestingtesting".as_bytes().to_vec(); + let err = deserialize_with_reader(&mut BufReader::new(&buffer[..]), 1234) + .unwrap_err(); + assert_eq!(err.to_string(), "Cache key mismatch"); + } + + #[test] + fn code_cache() { + let temp_dir = TempDir::new(); + let file_path = temp_dir.path().join("cache.bin").to_path_buf(); + let url1 = ModuleSpecifier::parse("https://deno.land/example1.js").unwrap(); + let url2 = ModuleSpecifier::parse("https://deno.land/example2.js").unwrap(); + // first run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234); + assert!(code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .is_none()); + assert!(code_cache + .get_sync(&url2, CodeCacheType::EsModule, 1) + .is_none()); + assert!(code_cache.enabled()); + code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[1, 2, 3]); + assert!(code_cache.enabled()); + assert!(!file_path.exists()); + code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[2, 1, 3]); + assert!(file_path.exists()); // now the new code cache exists + assert!(!code_cache.enabled()); // no longer enabled + } + // second run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234); + assert!(code_cache.enabled()); + let result1 = code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .unwrap(); + assert!(code_cache.enabled()); + let result2 = code_cache + .get_sync(&url2, CodeCacheType::EsModule, 1) + .unwrap(); + assert!(!code_cache.enabled()); // no longer enabled + assert_eq!(result1, vec![1, 2, 3]); + assert_eq!(result2, vec![2, 1, 3]); + } + + // new cache key first run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321); + assert!(code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .is_none()); + assert!(code_cache + .get_sync(&url2, CodeCacheType::EsModule, 1) + .is_none()); + code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[2, 2, 3]); + code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[3, 2, 3]); + } + // new cache key second run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321); + let result1 = code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .unwrap(); + assert_eq!(result1, vec![2, 2, 3]); + assert!(code_cache + .get_sync(&url2, CodeCacheType::EsModule, 5) // different hash will cause none + .is_none()); + } + } +} |