diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2024-08-26 19:59:17 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-26 23:59:17 +0000 |
commit | c89a20b42899abff5c3ea84660c8110806c5fbee (patch) | |
tree | e9d93ce49b391faf0058bd3223ba72d398f78fc8 | |
parent | e13230226fe91498b3a5f28a8de6edbe4f164944 (diff) |
perf(cache): single cache file for remote modules (#24983)
This changes the global cache to store the cache file for remote modules
in one file instead of two.
-rw-r--r-- | Cargo.lock | 4 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | cli/cache/mod.rs | 8 | ||||
-rw-r--r-- | cli/factory.rs | 7 | ||||
-rw-r--r-- | cli/file_fetcher.rs | 111 | ||||
-rw-r--r-- | cli/lsp/cache.rs | 10 | ||||
-rw-r--r-- | cli/lsp/documents.rs | 12 | ||||
-rw-r--r-- | cli/lsp/jsr.rs | 7 | ||||
-rw-r--r-- | tests/integration/jsr_tests.rs | 25 | ||||
-rw-r--r-- | tests/integration/run_tests.rs | 33 |
10 files changed, 93 insertions, 126 deletions
diff --git a/Cargo.lock b/Cargo.lock index 4226e7eb0..d97b04d4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1346,9 +1346,9 @@ dependencies = [ [[package]] name = "deno_cache_dir" -version = "0.10.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9fa4989e4c6b0409438e2a68a91e4e02858b0d8ba6e5bc6860af6b0d0f385e8" +checksum = "405790fa1fa5f05c2e7dca817f820dc1c950ea846f47212ed6d670a3023cb4fe" dependencies = [ "deno_media_type", "indexmap", diff --git a/Cargo.toml b/Cargo.toml index 3176d4e27..101663960 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -102,7 +102,7 @@ chrono = { version = "0.4", default-features = false, features = ["std", "serde" console_static_text = "=0.8.1" data-encoding = "2.3.3" data-url = "=0.3.0" -deno_cache_dir = "=0.10.2" +deno_cache_dir = "=0.11.0" deno_package_json = { version = "=0.1.1", default-features = false } dlopen2 = "0.6.1" ecb = "=0.1.2" diff --git a/cli/cache/mod.rs b/cli/cache/mod.rs index 772d2359d..cc183530d 100644 --- a/cli/cache/mod.rs +++ b/cli/cache/mod.rs @@ -62,12 +62,8 @@ pub const CACHE_PERM: u32 = 0o644; pub struct RealDenoCacheEnv; impl deno_cache_dir::DenoCacheEnv for RealDenoCacheEnv { - fn read_file_bytes(&self, path: &Path) -> std::io::Result<Option<Vec<u8>>> { - match std::fs::read(path) { - Ok(s) => Ok(Some(s)), - Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), - Err(err) => Err(err), - } + fn read_file_bytes(&self, path: &Path) -> std::io::Result<Vec<u8>> { + std::fs::read(path) } fn atomic_write_file( diff --git a/cli/factory.rs b/cli/factory.rs index 25f6afa74..224984643 100644 --- a/cli/factory.rs +++ b/cli/factory.rs @@ -304,8 +304,11 @@ impl CliFactory { let global_cache = self.global_http_cache()?.clone(); match self.cli_options()?.vendor_dir_path() { Some(local_path) => { - let local_cache = - LocalHttpCache::new(local_path.clone(), global_cache); + let local_cache = LocalHttpCache::new( + local_path.clone(), + global_cache, + deno_cache_dir::GlobalToLocalCopy::Allow, + ); Ok(Arc::new(local_cache)) } None => Ok(global_cache), diff --git a/cli/file_fetcher.rs b/cli/file_fetcher.rs index 19acf2e2b..ace4d3c7e 100644 --- a/cli/file_fetcher.rs +++ b/cli/file_fetcher.rs @@ -11,7 +11,6 @@ use crate::http_util::HttpClientProvider; use crate::util::progress_bar::ProgressBar; use deno_ast::MediaType; -use deno_core::anyhow::bail; use deno_core::anyhow::Context; use deno_core::error::custom_error; use deno_core::error::generic_error; @@ -52,6 +51,25 @@ pub enum FileOrRedirect { Redirect(ModuleSpecifier), } +impl FileOrRedirect { + fn from_deno_cache_entry( + specifier: &ModuleSpecifier, + cache_entry: deno_cache_dir::CacheEntry, + ) -> Result<Self, AnyError> { + if let Some(redirect_to) = cache_entry.metadata.headers.get("location") { + let redirect = + deno_core::resolve_import(redirect_to, specifier.as_str())?; + Ok(FileOrRedirect::Redirect(redirect)) + } else { + Ok(FileOrRedirect::File(File { + specifier: specifier.clone(), + maybe_headers: Some(cache_entry.metadata.headers), + source: Arc::from(cache_entry.content), + })) + } + } +} + /// A structure representing a source file. #[derive(Debug, Clone, Eq, PartialEq)] pub struct File { @@ -238,45 +256,32 @@ impl FileFetcher { ); let cache_key = self.http_cache.cache_item_key(specifier)?; // compute this once - let Some(headers) = self.http_cache.read_headers(&cache_key)? else { - return Ok(None); - }; - if let Some(redirect_to) = headers.get("location") { - let redirect = - deno_core::resolve_import(redirect_to, specifier.as_str())?; - return Ok(Some(FileOrRedirect::Redirect(redirect))); - } - let result = self.http_cache.read_file_bytes( + let result = self.http_cache.get( &cache_key, maybe_checksum .as_ref() .map(|c| deno_cache_dir::Checksum::new(c.as_str())), - deno_cache_dir::GlobalToLocalCopy::Allow, ); - let bytes = match result { - Ok(Some(bytes)) => bytes, - Ok(None) => return Ok(None), + match result { + Ok(Some(cache_data)) => Ok(Some(FileOrRedirect::from_deno_cache_entry( + specifier, cache_data, + )?)), + Ok(None) => Ok(None), Err(err) => match err { - deno_cache_dir::CacheReadFileError::Io(err) => return Err(err.into()), + deno_cache_dir::CacheReadFileError::Io(err) => Err(err.into()), deno_cache_dir::CacheReadFileError::ChecksumIntegrity(err) => { // convert to the equivalent deno_graph error so that it // enhances it if this is passed to deno_graph - return Err( + Err( deno_graph::source::ChecksumIntegrityError { actual: err.actual, expected: err.expected, } .into(), - ); + ) } }, - }; - - Ok(Some(FileOrRedirect::File(File { - specifier: specifier.clone(), - maybe_headers: Some(headers), - source: Arc::from(bytes), - }))) + } } /// Convert a data URL into a file, resulting in an error if the URL is @@ -363,12 +368,30 @@ impl FileFetcher { ); } - let maybe_etag = self + let maybe_etag_cache_entry = self .http_cache .cache_item_key(specifier) .ok() - .and_then(|key| self.http_cache.read_headers(&key).ok().flatten()) - .and_then(|headers| headers.get("etag").cloned()); + .and_then(|key| { + self + .http_cache + .get( + &key, + maybe_checksum + .as_ref() + .map(|c| deno_cache_dir::Checksum::new(c.as_str())), + ) + .ok() + .flatten() + }) + .and_then(|cache_entry| { + cache_entry + .metadata + .headers + .get("etag") + .cloned() + .map(|etag| (cache_entry, etag)) + }); let maybe_auth_token = self.auth_tokens.get(specifier); async fn handle_request_or_server_error( @@ -390,7 +413,6 @@ impl FileFetcher { } } - let mut maybe_etag = maybe_etag; let mut retried = false; // retry intermittent failures let result = loop { let result = match self @@ -399,31 +421,17 @@ impl FileFetcher { .fetch_no_follow(FetchOnceArgs { url: specifier.clone(), maybe_accept: maybe_accept.map(ToOwned::to_owned), - maybe_etag: maybe_etag.clone(), + maybe_etag: maybe_etag_cache_entry + .as_ref() + .map(|(_, etag)| etag.clone()), maybe_auth_token: maybe_auth_token.clone(), maybe_progress_guard: maybe_progress_guard.as_ref(), }) .await? { FetchOnceResult::NotModified => { - let file_or_redirect = - self.fetch_cached_no_follow(specifier, maybe_checksum)?; - match file_or_redirect { - Some(file_or_redirect) => Ok(file_or_redirect), - None => { - // Someone may have deleted the body from the cache since - // it's currently stored in a separate file from the headers, - // so delete the etag and try again - if maybe_etag.is_some() { - debug!("Cache body not found. Trying again without etag."); - maybe_etag = None; - continue; - } else { - // should never happen - bail!("Your deno cache directory is in an unrecoverable state. Please delete it and try again.") - } - } - } + let (cache_entry, _) = maybe_etag_cache_entry.unwrap(); + FileOrRedirect::from_deno_cache_entry(specifier, cache_entry) } FetchOnceResult::Redirect(redirect_url, headers) => { self.http_cache.set(specifier, headers, &[])?; @@ -1480,13 +1488,10 @@ mod tests { let cache_key = file_fetcher.http_cache.cache_item_key(url).unwrap(); let bytes = file_fetcher .http_cache - .read_file_bytes( - &cache_key, - None, - deno_cache_dir::GlobalToLocalCopy::Allow, - ) + .get(&cache_key, None) .unwrap() - .unwrap(); + .unwrap() + .content; String::from_utf8(bytes).unwrap() } diff --git a/cli/lsp/cache.rs b/cli/lsp/cache.rs index 5dae38c20..d3c05ca91 100644 --- a/cli/lsp/cache.rs +++ b/cli/lsp/cache.rs @@ -17,16 +17,6 @@ use std::path::Path; use std::sync::Arc; use std::time::SystemTime; -/// In the LSP, we disallow the cache from automatically copying from -/// the global cache to the local cache for technical reasons. -/// -/// 1. We need to verify the checksums from the lockfile are correct when -/// moving from the global to the local cache. -/// 2. We need to verify the checksums for JSR https specifiers match what -/// is found in the package's manifest. -pub const LSP_DISALLOW_GLOBAL_TO_LOCAL_COPY: deno_cache_dir::GlobalToLocalCopy = - deno_cache_dir::GlobalToLocalCopy::Disallow; - pub fn calculate_fs_version( cache: &LspCache, specifier: &ModuleSpecifier, diff --git a/cli/lsp/documents.rs b/cli/lsp/documents.rs index a8ddc8fd7..80c5a4dc5 100644 --- a/cli/lsp/documents.rs +++ b/cli/lsp/documents.rs @@ -2,7 +2,6 @@ use super::cache::calculate_fs_version; use super::cache::LspCache; -use super::cache::LSP_DISALLOW_GLOBAL_TO_LOCAL_COPY; use super::config::Config; use super::resolver::LspResolver; use super::testing::TestCollector; @@ -872,22 +871,19 @@ impl FileSystemDocuments { } else { let http_cache = cache.for_specifier(file_referrer); let cache_key = http_cache.cache_item_key(specifier).ok()?; - let bytes = http_cache - .read_file_bytes(&cache_key, None, LSP_DISALLOW_GLOBAL_TO_LOCAL_COPY) - .ok()??; - let specifier_headers = http_cache.read_headers(&cache_key).ok()??; + let cached_file = http_cache.get(&cache_key, None).ok()??; let (_, maybe_charset) = deno_graph::source::resolve_media_type_and_charset_from_headers( specifier, - Some(&specifier_headers), + Some(&cached_file.metadata.headers), ); let content = deno_graph::source::decode_owned_source( specifier, - bytes, + cached_file.content, maybe_charset, ) .ok()?; - let maybe_headers = Some(specifier_headers); + let maybe_headers = Some(cached_file.metadata.headers); Document::new( specifier.clone(), content.into(), diff --git a/cli/lsp/jsr.rs b/cli/lsp/jsr.rs index 9ffcdf9e6..c0a82383d 100644 --- a/cli/lsp/jsr.rs +++ b/cli/lsp/jsr.rs @@ -258,12 +258,9 @@ fn read_cached_url( cache: &Arc<dyn HttpCache>, ) -> Option<Vec<u8>> { cache - .read_file_bytes( - &cache.cache_item_key(url).ok()?, - None, - deno_cache_dir::GlobalToLocalCopy::Disallow, - ) + .get(&cache.cache_item_key(url).ok()?, None) .ok()? + .map(|f| f.content) } #[derive(Debug)] diff --git a/tests/integration/jsr_tests.rs b/tests/integration/jsr_tests.rs index e72eab1fa..f31b53f27 100644 --- a/tests/integration/jsr_tests.rs +++ b/tests/integration/jsr_tests.rs @@ -1,5 +1,7 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use deno_cache_dir::HttpCache; +use deno_core::serde_json; use deno_core::serde_json::json; use deno_core::serde_json::Value; use deno_lockfile::Lockfile; @@ -184,13 +186,24 @@ fn reload_info_not_found_cache_but_exists_remote() { let specifier = Url::parse(&format!("http://127.0.0.1:4250/{}/meta.json", package)) .unwrap(); - let registry_json_path = deno_dir - .path() - .join("deps") - .join(deno_cache_dir::url_to_filename(&specifier).unwrap()); - let mut registry_json = registry_json_path.read_json_value(); + let cache = deno_cache_dir::GlobalHttpCache::new( + deno_dir.path().join("deps").to_path_buf(), + deno_cache_dir::TestRealDenoCacheEnv, + ); + let entry = cache + .get(&cache.cache_item_key(&specifier).unwrap(), None) + .unwrap() + .unwrap(); + let mut registry_json: serde_json::Value = + serde_json::from_slice(&entry.content).unwrap(); remove_version(&mut registry_json, version); - registry_json_path.write_json(®istry_json); + cache + .set( + &specifier, + entry.metadata.headers.clone(), + registry_json.to_string().as_bytes(), + ) + .unwrap(); } // This tests that when a local machine doesn't have a version diff --git a/tests/integration/run_tests.rs b/tests/integration/run_tests.rs index deec3d1d4..ad20b77e1 100644 --- a/tests/integration/run_tests.rs +++ b/tests/integration/run_tests.rs @@ -4973,39 +4973,6 @@ console.log(add(3, 4)); } #[test] -fn run_etag_delete_source_cache() { - let test_context = TestContextBuilder::new() - .use_temp_cwd() - .use_http_server() - .build(); - test_context - .temp_dir() - .write("main.ts", "import 'http://localhost:4545/etag_script.ts'"); - test_context - .new_command() - .args("cache main.ts") - .run() - .skip_output_check(); - - // The cache is currently stored unideally in two files where one file has the headers - // and the other contains the body. An issue can happen with the etag header where the - // headers file exists, but the body was deleted. We need to get the cache to gracefully - // handle this scenario. - let deno_dir = test_context.deno_dir().path(); - let etag_script_path = deno_dir.join("deps/http/localhost_PORT4545/26110db7d42c9bad32386735cbc05c301f83e4393963deb8da14fec3b4202a13"); - assert!(etag_script_path.exists()); - etag_script_path.remove_file(); - - test_context - .new_command() - .args("cache --reload --log-level=debug main.ts") - .run() - .assert_matches_text( - "[WILDCARD]Cache body not found. Trying again without etag.[WILDCARD]", - ); -} - -#[test] fn code_cache_test() { let test_context = TestContextBuilder::new().use_temp_cwd().build(); let deno_dir = test_context.deno_dir(); |