diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2024-05-24 10:15:46 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-24 10:15:46 -0400 |
commit | b21004b1d16ad7b67c7b1cd235abf792bf9d9777 (patch) | |
tree | 2a89a04d2d87d8e30a34beb97cbccf320dbef0ac | |
parent | 92a8d09e498712aec2ba0e54a1ad85194ebd83af (diff) |
fix: use hash of in-memory bytes only for code cache (#23966)
* https://github.com/denoland/deno_core/pull/752
* https://github.com/denoland/deno_core/pull/753
Did benchmarking on this and it's slightly faster (couple ms) or equal
to in performance as main.
Closes #23904
-rw-r--r-- | Cargo.lock | 13 | ||||
-rw-r--r-- | Cargo.toml | 3 | ||||
-rw-r--r-- | cli/Cargo.toml | 2 | ||||
-rw-r--r-- | cli/cache/code_cache.rs | 31 | ||||
-rw-r--r-- | cli/cache/module_info.rs | 17 | ||||
-rw-r--r-- | cli/factory.rs | 1 | ||||
-rw-r--r-- | cli/module_loader.rs | 92 | ||||
-rw-r--r-- | runtime/Cargo.toml | 1 | ||||
-rw-r--r-- | runtime/code_cache.rs | 10 | ||||
-rw-r--r-- | runtime/fs_util.rs | 11 | ||||
-rw-r--r-- | runtime/worker.rs | 86 |
11 files changed, 93 insertions, 174 deletions
diff --git a/Cargo.lock b/Cargo.lock index 4ce435af3..0adc22f5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1304,9 +1304,9 @@ dependencies = [ [[package]] name = "deno_core" -version = "0.282.0" +version = "0.283.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cde984155f9ec0986cb2b8e444ad675822968c5042ded50e32647f907eefd4b4" +checksum = "0f5043f9f636a3fe021e63e41c946499e1706d7565126065c60912fe5c77e54e" dependencies = [ "anyhow", "bincode", @@ -1754,9 +1754,9 @@ dependencies = [ [[package]] name = "deno_ops" -version = "0.158.0" +version = "0.159.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f24983ce064294d3046f4fce5c53de4e6e08488c1d53f2387917f63f43cdfda8" +checksum = "b26cc5277982de16514282447f8674f9048d4f1b2c0d55c088d0a7d3bf6db385" dependencies = [ "proc-macro-rules", "proc-macro2", @@ -1837,6 +1837,7 @@ dependencies = [ "test_server", "tokio", "tokio-metrics", + "twox-hash", "uuid", "which 4.4.2", "winapi", @@ -5754,9 +5755,9 @@ dependencies = [ [[package]] name = "serde_v8" -version = "0.191.0" +version = "0.192.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af3b1eaa93eacbbfc08f3fc18ff7418262781ebbeca3282e36a7f26d9c679b50" +checksum = "f7b616df6c4ff5643dd503cbfe119175ebebfd4132512e33de4971f1e91d5739" dependencies = [ "num-bigint", "serde", diff --git a/Cargo.toml b/Cargo.toml index 6723c54e6..16db0e15f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ repository = "https://github.com/denoland/deno" [workspace.dependencies] deno_ast = { version = "=0.38.2", features = ["transpiling"] } -deno_core = { version = "0.282.0" } +deno_core = { version = "0.283.0" } deno_bench_util = { version = "0.147.0", path = "./bench_util" } deno_lockfile = "0.19.0" @@ -174,6 +174,7 @@ tokio = { version = "1.36.0", features = ["full"] } tokio-metrics = { version = "0.3.0", features = ["rt"] } tokio-util = "0.7.4" tower-lsp = { version = "=0.20.0", features = ["proposed"] } +twox-hash = "=1.6.3" # Upgrading past 2.4.1 may cause WPT failures url = { version = "< 2.5.0", features = ["serde", "expose_internals"] } uuid = { version = "1.3.0", features = ["v4"] } diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 8c033f55a..ec02a801c 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -146,7 +146,7 @@ thiserror.workspace = true tokio.workspace = true tokio-util.workspace = true tower-lsp.workspace = true -twox-hash = "=1.6.3" +twox-hash.workspace = true typed-arena = "=2.0.1" uuid = { workspace = true, features = ["serde"] } walkdir = "=2.3.2" diff --git a/cli/cache/code_cache.rs b/cli/cache/code_cache.rs index 5e44c366e..8d96bf3a1 100644 --- a/cli/cache/code_cache.rs +++ b/cli/cache/code_cache.rs @@ -1,5 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use deno_ast::ModuleSpecifier; use deno_core::error::AnyError; use deno_runtime::code_cache; use deno_runtime::deno_webstorage::rusqlite::params; @@ -21,7 +22,6 @@ pub static CODE_CACHE_DB: CacheDBConfiguration = CacheDBConfiguration { on_failure: CacheFailure::Blackhole, }; -#[derive(Clone)] pub struct CodeCache { inner: CodeCacheInner, } @@ -52,28 +52,28 @@ impl CodeCache { pub fn get_sync( &self, - specifier: &str, + specifier: &ModuleSpecifier, code_cache_type: code_cache::CodeCacheType, - source_hash: &str, + source_hash: u64, ) -> Option<Vec<u8>> { Self::ensure_ok(self.inner.get_sync( - specifier, + specifier.as_str(), code_cache_type, - source_hash, + &source_hash.to_string(), )) } pub fn set_sync( &self, - specifier: &str, + specifier: &ModuleSpecifier, code_cache_type: code_cache::CodeCacheType, - source_hash: &str, + source_hash: u64, data: &[u8], ) { Self::ensure_ok(self.inner.set_sync( - specifier, + specifier.as_str(), code_cache_type, - source_hash, + &source_hash.to_string(), data, )); } @@ -82,25 +82,24 @@ impl CodeCache { impl code_cache::CodeCache for CodeCache { fn get_sync( &self, - specifier: &str, + specifier: &ModuleSpecifier, code_cache_type: code_cache::CodeCacheType, - source_hash: &str, + source_hash: u64, ) -> Option<Vec<u8>> { self.get_sync(specifier, code_cache_type, source_hash) } fn set_sync( &self, - specifier: &str, + specifier: ModuleSpecifier, code_cache_type: code_cache::CodeCacheType, - source_hash: &str, + source_hash: u64, data: &[u8], ) { - self.set_sync(specifier, code_cache_type, source_hash, data); + self.set_sync(&specifier, code_cache_type, source_hash, data); } } -#[derive(Clone)] struct CodeCacheInner { conn: CacheDB, } @@ -135,7 +134,7 @@ impl CodeCacheInner { &self, specifier: &str, code_cache_type: code_cache::CodeCacheType, - source_hash: &str, + source_hash: &str, // use string because sqlite doesn't have a u64 type data: &[u8], ) -> Result<(), AnyError> { let sql = " diff --git a/cli/cache/module_info.rs b/cli/cache/module_info.rs index 2e9274160..0e7a97678 100644 --- a/cli/cache/module_info.rs +++ b/cli/cache/module_info.rs @@ -87,23 +87,6 @@ impl ModuleInfoCache { } } - pub fn get_module_source_hash( - &self, - specifier: &ModuleSpecifier, - media_type: MediaType, - ) -> Result<Option<ModuleInfoCacheSourceHash>, AnyError> { - let query = "SELECT source_hash FROM moduleinfocache WHERE specifier=?1 AND media_type=?2"; - let res = self.conn.query_row( - query, - params![specifier.as_str(), serialize_media_type(media_type)], - |row| { - let source_hash: String = row.get(0)?; - Ok(ModuleInfoCacheSourceHash(source_hash)) - }, - )?; - Ok(res) - } - pub fn get_module_info( &self, specifier: &ModuleSpecifier, diff --git a/cli/factory.rs b/cli/factory.rs index 43486bca0..6cdbff9bb 100644 --- a/cli/factory.rs +++ b/cli/factory.rs @@ -798,7 +798,6 @@ impl CliFactory { }, self.emitter()?.clone(), self.main_module_graph_container().await?.clone(), - self.module_info_cache()?.clone(), self.module_load_preparer().await?.clone(), cli_node_resolver.clone(), NpmModuleLoader::new( diff --git a/cli/module_loader.rs b/cli/module_loader.rs index 0ee555f38..6d8d3e92b 100644 --- a/cli/module_loader.rs +++ b/cli/module_loader.rs @@ -13,7 +13,7 @@ use crate::args::CliOptions; use crate::args::DenoSubcommand; use crate::args::TsTypeLib; use crate::cache::CodeCache; -use crate::cache::ModuleInfoCache; +use crate::cache::FastInsecureHasher; use crate::cache::ParsedSourceCache; use crate::emit::Emitter; use crate::factory::CliFactory; @@ -55,6 +55,7 @@ use deno_core::ModuleSpecifier; use deno_core::ModuleType; use deno_core::RequestedModuleType; use deno_core::ResolutionKind; +use deno_core::SourceCodeCacheInfo; use deno_core::SourceMapGetter; use deno_graph::source::ResolutionMode; use deno_graph::source::Resolver; @@ -67,7 +68,6 @@ use deno_graph::Resolution; use deno_lockfile::Lockfile; use deno_runtime::code_cache; use deno_runtime::deno_node::NodeResolutionMode; -use deno_runtime::fs_util::code_timestamp; use deno_runtime::permissions::PermissionsContainer; use deno_semver::npm::NpmPackageReqReference; @@ -221,7 +221,6 @@ struct SharedCliModuleLoaderState { code_cache: Option<Arc<CodeCache>>, emitter: Arc<Emitter>, main_module_graph_container: Arc<MainModuleGraphContainer>, - module_info_cache: Arc<ModuleInfoCache>, module_load_preparer: Arc<ModuleLoadPreparer>, node_resolver: Arc<CliNodeResolver>, npm_module_loader: NpmModuleLoader, @@ -240,7 +239,6 @@ impl CliModuleLoaderFactory { code_cache: Option<Arc<CodeCache>>, emitter: Arc<Emitter>, main_module_graph_container: Arc<MainModuleGraphContainer>, - module_info_cache: Arc<ModuleInfoCache>, module_load_preparer: Arc<ModuleLoadPreparer>, node_resolver: Arc<CliNodeResolver>, npm_module_loader: NpmModuleLoader, @@ -261,7 +259,6 @@ impl CliModuleLoaderFactory { code_cache, emitter, main_module_graph_container, - module_info_cache, module_load_preparer, node_resolver, npm_module_loader, @@ -388,27 +385,20 @@ impl<TGraphContainer: ModuleGraphContainer> } let code_cache = if module_type == ModuleType::JavaScript { - self.shared.code_cache.as_ref().and_then(|cache| { - let code_hash = self - .get_code_hash_or_timestamp(specifier, code_source.media_type) - .ok() - .flatten(); - if let Some(code_hash) = code_hash { - cache - .get_sync( - specifier.as_str(), - code_cache::CodeCacheType::EsModule, - &code_hash, - ) - .map(Cow::from) - .inspect(|_| { - // This log line is also used by tests. - log::debug!( - "V8 code cache hit for ES module: {specifier}, [{code_hash:?}]" - ); - }) - } else { - None + self.shared.code_cache.as_ref().map(|cache| { + let code_hash = FastInsecureHasher::hash(&code); + let data = cache + .get_sync(specifier, code_cache::CodeCacheType::EsModule, code_hash) + .map(Cow::from) + .inspect(|_| { + // This log line is also used by tests. + log::debug!( + "V8 code cache hit for ES module: {specifier}, [{code_hash:?}]" + ); + }); + SourceCodeCacheInfo { + hash: code_hash, + data, } }) } else { @@ -589,25 +579,6 @@ impl<TGraphContainer: ModuleGraphContainer> resolution.map_err(|err| err.into()) } - fn get_code_hash_or_timestamp( - &self, - specifier: &ModuleSpecifier, - media_type: MediaType, - ) -> Result<Option<String>, AnyError> { - let hash = self - .shared - .module_info_cache - .get_module_source_hash(specifier, media_type)?; - if let Some(hash) = hash { - return Ok(Some(hash.into())); - } - - // Use the modified timestamp from the local file system if we don't have a hash. - let timestamp = code_timestamp(specifier.as_str()) - .map(|timestamp| timestamp.to_string())?; - Ok(Some(timestamp)) - } - async fn load_prepared_module( &self, specifier: &ModuleSpecifier, @@ -865,28 +836,21 @@ impl<TGraphContainer: ModuleGraphContainer> ModuleLoader fn code_cache_ready( &self, - specifier: &ModuleSpecifier, + specifier: ModuleSpecifier, + source_hash: u64, code_cache: &[u8], ) -> Pin<Box<dyn Future<Output = ()>>> { if let Some(cache) = self.0.shared.code_cache.as_ref() { - let media_type = MediaType::from_specifier(specifier); - let code_hash = self - .0 - .get_code_hash_or_timestamp(specifier, media_type) - .ok() - .flatten(); - if let Some(code_hash) = code_hash { - // This log line is also used by tests. - log::debug!( - "Updating V8 code cache for ES module: {specifier}, [{code_hash:?}]" - ); - cache.set_sync( - specifier.as_str(), - code_cache::CodeCacheType::EsModule, - &code_hash, - code_cache, - ); - } + // This log line is also used by tests. + log::debug!( + "Updating V8 code cache for ES module: {specifier}, [{source_hash:?}]" + ); + cache.set_sync( + &specifier, + code_cache::CodeCacheType::EsModule, + source_hash, + code_cache, + ); } std::future::ready(()).boxed_local() } diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index 80dd6a3e0..f66dfb840 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -116,6 +116,7 @@ signal-hook = "0.3.17" signal-hook-registry = "1.4.0" tokio.workspace = true tokio-metrics.workspace = true +twox-hash.workspace = true uuid.workspace = true which = "4.2.5" diff --git a/runtime/code_cache.rs b/runtime/code_cache.rs index ccc070365..2a56543a4 100644 --- a/runtime/code_cache.rs +++ b/runtime/code_cache.rs @@ -1,5 +1,7 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use deno_core::ModuleSpecifier; + pub enum CodeCacheType { EsModule, Script, @@ -17,15 +19,15 @@ impl CodeCacheType { pub trait CodeCache: Send + Sync { fn get_sync( &self, - specifier: &str, + specifier: &ModuleSpecifier, code_cache_type: CodeCacheType, - source_hash: &str, + source_hash: u64, ) -> Option<Vec<u8>>; fn set_sync( &self, - specifier: &str, + specifier: ModuleSpecifier, code_cache_type: CodeCacheType, - source_hash: &str, + source_hash: u64, data: &[u8], ); } diff --git a/runtime/fs_util.rs b/runtime/fs_util.rs index 09b107300..fe9736038 100644 --- a/runtime/fs_util.rs +++ b/runtime/fs_util.rs @@ -63,17 +63,6 @@ pub fn specifier_to_file_path( } } -pub fn code_timestamp(specifier: &str) -> Result<u64, AnyError> { - let specifier = ModuleSpecifier::parse(specifier)?; - let path = specifier_to_file_path(&specifier)?; - #[allow(clippy::disallowed_methods)] - let timestamp = std::fs::metadata(path)? - .modified()? - .duration_since(std::time::UNIX_EPOCH)? - .as_millis() as u64; - Ok(timestamp) -} - #[cfg(test)] mod tests { use super::*; diff --git a/runtime/worker.rs b/runtime/worker.rs index 1c291c641..09faa6e08 100644 --- a/runtime/worker.rs +++ b/runtime/worker.rs @@ -32,6 +32,7 @@ use deno_core::OpMetricsSummaryTracker; use deno_core::PollEventLoopOptions; use deno_core::RuntimeOptions; use deno_core::SharedArrayBufferStore; +use deno_core::SourceCodeCacheInfo; use deno_core::SourceMapGetter; use deno_cron::local::LocalCronHandler; use deno_fs::FileSystem; @@ -45,7 +46,6 @@ use log::debug; use crate::code_cache::CodeCache; use crate::code_cache::CodeCacheType; -use crate::fs_util::code_timestamp; use crate::inspector_server::InspectorServer; use crate::ops; use crate::permissions::PermissionsContainer; @@ -306,51 +306,6 @@ pub fn create_op_metrics( (op_summary_metrics, op_metrics_factory_fn) } -fn get_code_cache( - code_cache: Arc<dyn CodeCache>, - specifier: &str, -) -> Option<Vec<u8>> { - // Code hashes are not maintained for op_eval_context scripts. Instead we use - // the modified timestamp from the local file system. - if let Ok(code_timestamp) = code_timestamp(specifier) { - code_cache - .get_sync( - specifier, - CodeCacheType::Script, - code_timestamp.to_string().as_str(), - ) - .inspect(|_| { - // This log line is also used by tests. - log::debug!( - "V8 code cache hit for script: {specifier}, [{code_timestamp}]" - ); - }) - } else { - None - } -} - -fn set_code_cache( - code_cache: Arc<dyn CodeCache>, - specifier: &str, - data: &[u8], -) { - // Code hashes are not maintained for op_eval_context scripts. Instead we use - // the modified timestamp from the local file system. - if let Ok(code_timestamp) = code_timestamp(specifier) { - // This log line is also used by tests. - log::debug!( - "Updating V8 code cache for script: {specifier}, [{code_timestamp}]", - ); - code_cache.set_sync( - specifier, - CodeCacheType::Script, - code_timestamp.to_string().as_str(), - data, - ); - } -} - impl MainWorker { pub fn bootstrap_from_options( main_module: ModuleSpecifier, @@ -550,16 +505,41 @@ impl MainWorker { validate_import_attributes_cb: Some(Box::new( validate_import_attributes_callback, )), - enable_code_cache: options.v8_code_cache.is_some(), eval_context_code_cache_cbs: options.v8_code_cache.map(|cache| { let cache_clone = cache.clone(); ( - Box::new(move |specifier: &str| { - Ok(get_code_cache(cache.clone(), specifier).map(Cow::Owned)) - }) as Box<dyn Fn(&_) -> _>, - Box::new(move |specifier: &str, data: &[u8]| { - set_code_cache(cache_clone.clone(), specifier, data); - }) as Box<dyn Fn(&_, &_)>, + Box::new(move |specifier: &ModuleSpecifier, code: &v8::String| { + let source_hash = { + use std::hash::Hash; + use std::hash::Hasher; + let mut hasher = twox_hash::XxHash64::default(); + code.hash(&mut hasher); + hasher.finish() + }; + let data = cache + .get_sync(specifier, CodeCacheType::Script, source_hash) + .inspect(|_| { + // This log line is also used by tests. + log::debug!("V8 code cache hit for script: {specifier}, [{source_hash}]"); + }) + .map(Cow::Owned); + Ok(SourceCodeCacheInfo { + data, + hash: source_hash, + }) + }) as Box<dyn Fn(&_, &_) -> _>, + Box::new( + move |specifier: ModuleSpecifier, source_hash: u64, data: &[u8]| { + // This log line is also used by tests. + log::debug!("Updating V8 code cache for script: {specifier}, [{source_hash}]"); + cache_clone.set_sync( + specifier, + CodeCacheType::Script, + source_hash, + data, + ); + }, + ) as Box<dyn Fn(_, _, &_)>, ) }), ..Default::default() |