diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2023-07-10 13:42:47 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-10 13:42:47 -0400 |
commit | 629d09b149ab42cc4c3cebc41e0f23112ace891c (patch) | |
tree | 5bcde27002aeb3f86ac99a3ad79d1e59a1901260 | |
parent | fe91cd5b84f3dfb3c1d375cd2eb98792b869b675 (diff) |
perf: add setup cache for node_modules folder (#19787)
Part of #19774. This makes it twice as fast on my machine.
Stores a file at `node_modules/.deno/setup-cache.bin`, which contains
information about how the node_modules folder is currently setup.
Obviously there is a risk that this information will get out of date
with the current folder structure.
-rw-r--r-- | Cargo.lock | 10 | ||||
-rw-r--r-- | cli/Cargo.toml | 1 | ||||
-rw-r--r-- | cli/npm/resolvers/local.rs | 257 |
3 files changed, 229 insertions, 39 deletions
diff --git a/Cargo.lock b/Cargo.lock index d05bf423a..0cf8d56b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -373,6 +373,15 @@ dependencies = [ ] [[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] name = "bit-set" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -779,6 +788,7 @@ dependencies = [ "atty", "base32", "base64 0.13.1", + "bincode", "cache_control", "chrono", "clap", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 30480016a..40db9d243 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -58,6 +58,7 @@ async-trait.workspace = true atty.workspace = true base32 = "=0.4.0" base64.workspace = true +bincode = "=1.3.3" cache_control.workspace = true chrono = { version = "=0.4.22", default-features = false, features = ["std"] } clap = { version = "=4.3.3", features = ["string"] } diff --git a/cli/npm/resolvers/local.rs b/cli/npm/resolvers/local.rs index 670f41480..e94a91086 100644 --- a/cli/npm/resolvers/local.rs +++ b/cli/npm/resolvers/local.rs @@ -11,7 +11,9 @@ use std::path::Path; use std::path::PathBuf; use std::sync::Arc; +use crate::cache::CACHE_PERM; use crate::npm::cache::mixed_case_package_name_decode; +use crate::util::fs::atomic_write_file; use crate::util::fs::symlink_dir; use crate::util::fs::LaxSingleProcessFsFlag; use crate::util::progress_bar::ProgressBar; @@ -35,6 +37,8 @@ use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; use deno_runtime::deno_node::PackageJson; use deno_semver::npm::NpmPackageNv; +use serde::Deserialize; +use serde::Serialize; use crate::npm::cache::mixed_case_package_name_encode; use crate::npm::cache::should_sync_download; @@ -278,6 +282,10 @@ async fn sync_resolution_with_fs( ) .await; + // load this after we get the directory lock + let mut setup_cache = + SetupCache::load(deno_local_registry_dir.join(".setup-cache.bin")); + let pb_clear_guard = progress_bar.clear_guard(); // prevent flickering // 1. Write all the packages out the .deno directory. @@ -307,15 +315,19 @@ async fn sync_resolution_with_fs( newest_packages_by_name.insert(&package.id.nv.name, package); }; - let folder_name = + let package_folder_name = get_package_folder_id_folder_name(&package.get_package_cache_folder_id()); - let folder_path = deno_local_registry_dir.join(&folder_name); + let folder_path = deno_local_registry_dir.join(&package_folder_name); let initialized_file = folder_path.join(".initialized"); if !cache .cache_setting() .should_use_for_npm_package(&package.id.nv.name) || !initialized_file.exists() { + // cache bust the dep from the dep setup cache so the symlinks + // are forced to be recreated + setup_cache.remove_dep(&package_folder_name); + let pb = progress_bar.clone(); let cache = cache.clone(); let registry_url = registry_url.clone(); @@ -388,11 +400,12 @@ async fn sync_resolution_with_fs( // Symlink node_modules/.deno/<package_id>/node_modules/<dep_name> to // node_modules/.deno/<dep_id>/node_modules/<dep_package_name> for package in package_partitions.iter_all() { + let package_folder_name = + get_package_folder_id_folder_name(&package.get_package_cache_folder_id()); let sub_node_modules = deno_local_registry_dir - .join(get_package_folder_id_folder_name( - &package.get_package_cache_folder_id(), - )) + .join(&package_folder_name) .join("node_modules"); + let mut dep_setup_cache = setup_cache.with_dep(&package_folder_name); for (name, dep_id) in &package.dependencies { let dep_cache_folder_id = snapshot .package_from_id(dep_id) @@ -400,16 +413,18 @@ async fn sync_resolution_with_fs( .get_package_cache_folder_id(); let dep_folder_name = get_package_folder_id_folder_name(&dep_cache_folder_id); - let dep_folder_path = join_package_name( - &deno_local_registry_dir - .join(dep_folder_name) - .join("node_modules"), - &dep_id.nv.name, - ); - symlink_package_dir( - &dep_folder_path, - &join_package_name(&sub_node_modules, name), - )?; + if dep_setup_cache.insert(name, &dep_folder_name) { + let dep_folder_path = join_package_name( + &deno_local_registry_dir + .join(dep_folder_name) + .join("node_modules"), + &dep_id.nv.name, + ); + symlink_package_dir( + &dep_folder_path, + &join_package_name(&sub_node_modules, name), + )?; + } } } @@ -425,19 +440,21 @@ async fn sync_resolution_with_fs( continue; // skip, already handled } let package = snapshot.package_from_id(id).unwrap(); - let local_registry_package_path = join_package_name( - &deno_local_registry_dir - .join(get_package_folder_id_folder_name( - &package.get_package_cache_folder_id(), - )) - .join("node_modules"), - &id.nv.name, - ); + let target_folder_name = + get_package_folder_id_folder_name(&package.get_package_cache_folder_id()); + if setup_cache.insert_root_symlink(&id.nv.name, &target_folder_name) { + let local_registry_package_path = join_package_name( + &deno_local_registry_dir + .join(target_folder_name) + .join("node_modules"), + &id.nv.name, + ); - symlink_package_dir( - &local_registry_package_path, - &join_package_name(root_node_modules_dir_path, &id.nv.name), - )?; + symlink_package_dir( + &local_registry_package_path, + &join_package_name(root_node_modules_dir_path, &id.nv.name), + )?; + } } // 5. Create a node_modules/.deno/node_modules/<package-name> directory with @@ -447,27 +464,159 @@ async fn sync_resolution_with_fs( continue; // skip, already handled } - let local_registry_package_path = join_package_name( - &deno_local_registry_dir - .join(get_package_folder_id_folder_name( - &package.get_package_cache_folder_id(), - )) - .join("node_modules"), - &package.id.nv.name, - ); + let target_folder_name = + get_package_folder_id_folder_name(&package.get_package_cache_folder_id()); + if setup_cache.insert_deno_symlink(&package.id.nv.name, &target_folder_name) + { + let local_registry_package_path = join_package_name( + &deno_local_registry_dir + .join(target_folder_name) + .join("node_modules"), + &package.id.nv.name, + ); - symlink_package_dir( - &local_registry_package_path, - &join_package_name(&deno_node_modules_dir, &package.id.nv.name), - )?; + symlink_package_dir( + &local_registry_package_path, + &join_package_name(&deno_node_modules_dir, &package.id.nv.name), + )?; + } } + setup_cache.save(); drop(single_process_lock); drop(pb_clear_guard); Ok(()) } +/// Represents a dependency at `node_modules/.deno/<package_id>/` +struct SetupCacheDep<'a> { + previous: Option<&'a HashMap<String, String>>, + current: &'a mut HashMap<String, String>, +} + +impl<'a> SetupCacheDep<'a> { + pub fn insert(&mut self, name: &str, target_folder_name: &str) -> bool { + self + .current + .insert(name.to_string(), target_folder_name.to_string()); + if let Some(previous_target) = self.previous.and_then(|p| p.get(name)) { + previous_target != target_folder_name + } else { + true + } + } +} + +#[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq)] +struct SetupCacheData { + root_symlinks: HashMap<String, String>, + deno_symlinks: HashMap<String, String>, + dep_symlinks: HashMap<String, HashMap<String, String>>, +} + +/// It is very slow to try to re-setup the symlinks each time, so this will +/// cache what we've setup on the last run and only update what is necessary. +/// Obviously this could lead to issues if the cache gets out of date with the +/// file system, such as if the user manually deletes a symlink. +struct SetupCache { + file_path: PathBuf, + previous: Option<SetupCacheData>, + current: SetupCacheData, +} + +impl SetupCache { + pub fn load(file_path: PathBuf) -> Self { + let previous = std::fs::read(&file_path) + .ok() + .and_then(|data| bincode::deserialize(&data).ok()); + Self { + file_path, + previous, + current: Default::default(), + } + } + + pub fn save(&self) -> bool { + if let Some(previous) = &self.previous { + if previous == &self.current { + return false; // nothing to save + } + } + + bincode::serialize(&self.current).ok().and_then(|data| { + atomic_write_file(&self.file_path, data, CACHE_PERM).ok() + }); + true + } + + /// Inserts and checks for the existence of a root symlink + /// at `node_modules/<package_name>` pointing to + /// `node_modules/.deno/<package_id>/` + pub fn insert_root_symlink( + &mut self, + name: &str, + target_folder_name: &str, + ) -> bool { + self + .current + .root_symlinks + .insert(name.to_string(), target_folder_name.to_string()); + if let Some(previous_target) = self + .previous + .as_ref() + .and_then(|p| p.root_symlinks.get(name)) + { + previous_target != target_folder_name + } else { + true + } + } + + /// Inserts and checks for the existence of a symlink at + /// `node_modules/.deno/node_modules/<package_name>` pointing to + /// `node_modules/.deno/<package_id>/` + pub fn insert_deno_symlink( + &mut self, + name: &str, + target_folder_name: &str, + ) -> bool { + self + .current + .deno_symlinks + .insert(name.to_string(), target_folder_name.to_string()); + if let Some(previous_target) = self + .previous + .as_ref() + .and_then(|p| p.deno_symlinks.get(name)) + { + previous_target != target_folder_name + } else { + true + } + } + + pub fn remove_dep(&mut self, parent_name: &str) { + if let Some(previous) = &mut self.previous { + previous.dep_symlinks.remove(parent_name); + } + } + + pub fn with_dep(&mut self, parent_name: &str) -> SetupCacheDep<'_> { + SetupCacheDep { + previous: self + .previous + .as_ref() + .and_then(|p| p.dep_symlinks.get(parent_name)), + current: self + .current + .dep_symlinks + .entry(parent_name.to_string()) + .or_default(), + } + } +} + fn get_package_folder_id_folder_name( folder_id: &NpmPackageCacheFolderId, ) -> String { @@ -574,6 +723,7 @@ fn join_package_name(path: &Path, package_name: &str) -> PathBuf { mod test { use deno_npm::NpmPackageCacheFolderId; use deno_semver::npm::NpmPackageNv; + use test_util::TempDir; use super::*; @@ -607,4 +757,33 @@ mod test { assert_eq!(folder_id, input); } } + + #[test] + fn test_setup_cache() { + let temp_dir = TempDir::new(); + let cache_bin_path = temp_dir.path().join("cache.bin").to_path_buf(); + let mut cache = SetupCache::load(cache_bin_path.clone()); + assert!(cache.insert_deno_symlink("package-a", "package-a@1.0.0")); + assert!(cache.insert_root_symlink("package-a", "package-a@1.0.0")); + assert!(cache + .with_dep("package-a") + .insert("package-b", "package-b@1.0.0")); + assert!(cache.save()); + + let mut cache = SetupCache::load(cache_bin_path.clone()); + assert!(!cache.insert_deno_symlink("package-a", "package-a@1.0.0")); + assert!(!cache.insert_root_symlink("package-a", "package-a@1.0.0")); + assert!(!cache + .with_dep("package-a") + .insert("package-b", "package-b@1.0.0")); + assert!(!cache.save()); + assert!(cache.insert_root_symlink("package-b", "package-b@0.2.0")); + assert!(cache.save()); + + let mut cache = SetupCache::load(cache_bin_path); + cache.remove_dep("package-a"); + assert!(cache + .with_dep("package-a") + .insert("package-b", "package-b@1.0.0")); + } } |