diff options
Diffstat (limited to 'cli/npm/cache.rs')
-rw-r--r-- | cli/npm/cache.rs | 336 |
1 files changed, 238 insertions, 98 deletions
diff --git a/cli/npm/cache.rs b/cli/npm/cache.rs index 6a0d72b3a..2d983fa06 100644 --- a/cli/npm/cache.rs +++ b/cli/npm/cache.rs @@ -21,7 +21,6 @@ use crate::progress_bar::ProgressBar; use super::registry::NpmPackageVersionDistInfo; use super::semver::NpmVersion; use super::tarball::verify_and_extract_tarball; -use super::NpmPackageId; /// For some of the tests, we want downloading of packages /// to be deterministic so that the output is always the same @@ -29,7 +28,107 @@ pub fn should_sync_download() -> bool { std::env::var("DENO_UNSTABLE_NPM_SYNC_DOWNLOAD") == Ok("1".to_string()) } -pub const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock"; +const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock"; + +pub fn with_folder_sync_lock( + package: (&str, &NpmVersion), + output_folder: &Path, + action: impl FnOnce() -> Result<(), AnyError>, +) -> Result<(), AnyError> { + fn inner( + output_folder: &Path, + action: impl FnOnce() -> Result<(), AnyError>, + ) -> Result<(), AnyError> { + fs::create_dir_all(output_folder).with_context(|| { + format!("Error creating '{}'.", output_folder.display()) + })?; + + // This sync lock file is a way to ensure that partially created + // npm package directories aren't considered valid. This could maybe + // be a bit smarter in the future to not bother extracting here + // if another process has taken the lock in the past X seconds and + // wait for the other process to finish (it could try to create the + // file with `create_new(true)` then if it exists, check the metadata + // then wait until the other process finishes with a timeout), but + // for now this is good enough. + let sync_lock_path = output_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME); + match fs::OpenOptions::new() + .write(true) + .create(true) + .open(&sync_lock_path) + { + Ok(_) => { + action()?; + // extraction succeeded, so only now delete this file + let _ignore = std::fs::remove_file(&sync_lock_path); + Ok(()) + } + Err(err) => { + bail!( + concat!( + "Error creating package sync lock file at '{}'. ", + "Maybe try manually deleting this folder.\n\n{:#}", + ), + output_folder.display(), + err + ); + } + } + } + + match inner(output_folder, action) { + Ok(()) => Ok(()), + Err(err) => { + if let Err(remove_err) = fs::remove_dir_all(&output_folder) { + if remove_err.kind() != std::io::ErrorKind::NotFound { + bail!( + concat!( + "Failed setting up package cache directory for {}@{}, then ", + "failed cleaning it up.\n\nOriginal error:\n\n{}\n\n", + "Remove error:\n\n{}\n\nPlease manually ", + "delete this folder or you will run into issues using this ", + "package in the future:\n\n{}" + ), + package.0, + package.1, + err, + remove_err, + output_folder.display(), + ); + } + } + Err(err) + } + } +} + +pub struct NpmPackageCacheFolderId { + pub name: String, + pub version: NpmVersion, + /// Peer dependency resolution may require us to have duplicate copies + /// of the same package. + pub copy_index: usize, +} + +impl NpmPackageCacheFolderId { + pub fn with_no_count(&self) -> Self { + Self { + name: self.name.clone(), + version: self.version.clone(), + copy_index: 0, + } + } +} + +impl std::fmt::Display for NpmPackageCacheFolderId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}@{}", self.name, self.version)?; + if self.copy_index > 0 { + write!(f, "_{}", self.copy_index)?; + } + Ok(()) + } +} #[derive(Clone, Debug)] pub struct ReadonlyNpmCache { @@ -78,32 +177,49 @@ impl ReadonlyNpmCache { Self::new(dir.root.join("npm")) } - pub fn package_folder( + pub fn package_folder_for_id( &self, - id: &NpmPackageId, + id: &NpmPackageCacheFolderId, + registry_url: &Url, + ) -> PathBuf { + if id.copy_index == 0 { + self.package_folder_for_name_and_version( + &id.name, + &id.version, + registry_url, + ) + } else { + self + .package_name_folder(&id.name, registry_url) + .join(format!("{}_{}", id.version, id.copy_index)) + } + } + + pub fn package_folder_for_name_and_version( + &self, + name: &str, + version: &NpmVersion, registry_url: &Url, ) -> PathBuf { self - .package_name_folder(&id.name, registry_url) - .join(id.version.to_string()) + .package_name_folder(name, registry_url) + .join(version.to_string()) } pub fn package_name_folder(&self, name: &str, registry_url: &Url) -> PathBuf { let mut dir = self.registry_folder(registry_url); - let mut parts = name.split('/').map(Cow::Borrowed).collect::<Vec<_>>(); - // package names were not always enforced to be lowercase and so we need - // to ensure package names, which are therefore case sensitive, are stored - // on a case insensitive file system to not have conflicts. We do this by - // first putting it in a "_" folder then hashing the package name. + let parts = name.split('/').map(Cow::Borrowed).collect::<Vec<_>>(); if name.to_lowercase() != name { - let last_part = parts.last_mut().unwrap(); - *last_part = Cow::Owned(crate::checksum::gen(&[last_part.as_bytes()])); - // We can't just use the hash as part of the directory because it may - // have a collision with an actual package name in case someone wanted - // to name an actual package that. To get around this, put all these - // in a folder called "_" since npm packages can't start with an underscore - // and there is no package currently called just "_". - dir = dir.join("_"); + // Lowercase package names introduce complications. + // When implementing this ensure: + // 1. It works on case insensitive filesystems. ex. JSON should not + // conflict with json... yes you read that right, those are separate + // packages. + // 2. We can figure out the package id from the path. This is used + // in resolve_package_id_from_specifier + // Probably use a hash of the package name at `npm/-/<hash>` then create + // a mapping for these package names. + todo!("deno currently doesn't support npm package names that are not all lowercase"); } // ensure backslashes are used on windows for part in parts { @@ -118,23 +234,24 @@ impl ReadonlyNpmCache { .join(fs_util::root_url_to_safe_local_dirname(registry_url)) } - pub fn resolve_package_id_from_specifier( + pub fn resolve_package_folder_id_from_specifier( &self, specifier: &ModuleSpecifier, registry_url: &Url, - ) -> Result<NpmPackageId, AnyError> { - match self.maybe_resolve_package_id_from_specifier(specifier, registry_url) + ) -> Result<NpmPackageCacheFolderId, AnyError> { + match self + .maybe_resolve_package_folder_id_from_specifier(specifier, registry_url) { Some(id) => Ok(id), None => bail!("could not find npm package for '{}'", specifier), } } - fn maybe_resolve_package_id_from_specifier( + fn maybe_resolve_package_folder_id_from_specifier( &self, specifier: &ModuleSpecifier, registry_url: &Url, - ) -> Option<NpmPackageId> { + ) -> Option<NpmPackageCacheFolderId> { let registry_root_dir = self .root_dir_url .join(&format!( @@ -153,6 +270,7 @@ impl ReadonlyNpmCache { // examples: // * chalk/5.0.1/ // * @types/chalk/5.0.1/ + // * some-package/5.0.1_1/ -- where the `_1` (/_\d+/) is a copy of the folder for peer deps let is_scoped_package = relative_url.starts_with('@'); let mut parts = relative_url .split('/') @@ -163,11 +281,19 @@ impl ReadonlyNpmCache { if parts.len() < 2 { return None; } - let version = parts.pop().unwrap(); + let version_part = parts.pop().unwrap(); let name = parts.join("/"); - NpmVersion::parse(version) - .ok() - .map(|version| NpmPackageId { name, version }) + let (version, copy_index) = + if let Some((version, copy_count)) = version_part.split_once('_') { + (version, copy_count.parse::<usize>().ok()?) + } else { + (version_part, 0) + }; + Some(NpmPackageCacheFolderId { + name, + version: NpmVersion::parse(version).ok()?, + copy_index, + }) } pub fn get_cache_location(&self) -> PathBuf { @@ -202,28 +328,38 @@ impl NpmCache { pub async fn ensure_package( &self, - id: &NpmPackageId, + package: (&str, &NpmVersion), dist: &NpmPackageVersionDistInfo, registry_url: &Url, ) -> Result<(), AnyError> { self - .ensure_package_inner(id, dist, registry_url) + .ensure_package_inner(package, dist, registry_url) .await - .with_context(|| format!("Failed caching npm package '{}'.", id)) + .with_context(|| { + format!("Failed caching npm package '{}@{}'.", package.0, package.1) + }) + } + + pub fn should_use_cache_for_npm_package(&self, package_name: &str) -> bool { + self.cache_setting.should_use_for_npm_package(package_name) } async fn ensure_package_inner( &self, - id: &NpmPackageId, + package: (&str, &NpmVersion), dist: &NpmPackageVersionDistInfo, registry_url: &Url, ) -> Result<(), AnyError> { - let package_folder = self.readonly.package_folder(id, registry_url); + let package_folder = self.readonly.package_folder_for_name_and_version( + package.0, + package.1, + registry_url, + ); if package_folder.exists() // if this file exists, then the package didn't successfully extract // the first time, or another process is currently extracting the zip file && !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists() - && self.cache_setting.should_use_for_npm_package(&id.name) + && self.should_use_cache_for_npm_package(package.0) { return Ok(()); } else if self.cache_setting == CacheSetting::Only { @@ -231,7 +367,7 @@ impl NpmCache { "NotCached", format!( "An npm specifier not found in cache: \"{}\", --cached-only is specified.", - id.name + &package.0 ) ) ); @@ -256,38 +392,66 @@ impl NpmCache { } else { let bytes = response.bytes().await?; - match verify_and_extract_tarball(id, &bytes, dist, &package_folder) { - Ok(()) => Ok(()), - Err(err) => { - if let Err(remove_err) = fs::remove_dir_all(&package_folder) { - if remove_err.kind() != std::io::ErrorKind::NotFound { - bail!( - concat!( - "Failed verifying and extracting npm tarball for {}, then ", - "failed cleaning up package cache folder.\n\nOriginal ", - "error:\n\n{}\n\nRemove error:\n\n{}\n\nPlease manually ", - "delete this folder or you will run into issues using this ", - "package in the future:\n\n{}" - ), - id, - err, - remove_err, - package_folder.display(), - ); - } - } - Err(err) - } - } + verify_and_extract_tarball(package, &bytes, dist, &package_folder) } } - pub fn package_folder( + /// Ensures a copy of the package exists in the global cache. + /// + /// This assumes that the original package folder being hard linked + /// from exists before this is called. + pub fn ensure_copy_package( &self, - id: &NpmPackageId, + id: &NpmPackageCacheFolderId, + registry_url: &Url, + ) -> Result<(), AnyError> { + assert_ne!(id.copy_index, 0); + let package_folder = self.readonly.package_folder_for_id(id, registry_url); + + if package_folder.exists() + // if this file exists, then the package didn't successfully extract + // the first time, or another process is currently extracting the zip file + && !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists() + && self.cache_setting.should_use_for_npm_package(&id.name) + { + return Ok(()); + } + + let original_package_folder = self + .readonly + .package_folder_for_name_and_version(&id.name, &id.version, registry_url); + with_folder_sync_lock( + (id.name.as_str(), &id.version), + &package_folder, + || { + fs_util::hard_link_dir_recursive( + &original_package_folder, + &package_folder, + ) + }, + )?; + Ok(()) + } + + pub fn package_folder_for_id( + &self, + id: &NpmPackageCacheFolderId, + registry_url: &Url, + ) -> PathBuf { + self.readonly.package_folder_for_id(id, registry_url) + } + + pub fn package_folder_for_name_and_version( + &self, + name: &str, + version: &NpmVersion, registry_url: &Url, ) -> PathBuf { - self.readonly.package_folder(id, registry_url) + self.readonly.package_folder_for_name_and_version( + name, + version, + registry_url, + ) } pub fn package_name_folder(&self, name: &str, registry_url: &Url) -> PathBuf { @@ -298,14 +462,14 @@ impl NpmCache { self.readonly.registry_folder(registry_url) } - pub fn resolve_package_id_from_specifier( + pub fn resolve_package_folder_id_from_specifier( &self, specifier: &ModuleSpecifier, registry_url: &Url, - ) -> Result<NpmPackageId, AnyError> { + ) -> Result<NpmPackageCacheFolderId, AnyError> { self .readonly - .resolve_package_id_from_specifier(specifier, registry_url) + .resolve_package_folder_id_from_specifier(specifier, registry_url) } } @@ -314,8 +478,8 @@ mod test { use deno_core::url::Url; use super::ReadonlyNpmCache; + use crate::npm::cache::NpmPackageCacheFolderId; use crate::npm::semver::NpmVersion; - use crate::npm::NpmPackageId; #[test] fn should_get_lowercase_package_folder() { @@ -323,12 +487,12 @@ mod test { let cache = ReadonlyNpmCache::new(root_dir.clone()); let registry_url = Url::parse("https://registry.npmjs.org/").unwrap(); - // all lowercase should be as-is assert_eq!( - cache.package_folder( - &NpmPackageId { + cache.package_folder_for_id( + &NpmPackageCacheFolderId { name: "json".to_string(), version: NpmVersion::parse("1.2.5").unwrap(), + copy_index: 0, }, ®istry_url, ), @@ -337,44 +501,20 @@ mod test { .join("json") .join("1.2.5"), ); - } - #[test] - fn should_handle_non_all_lowercase_package_names() { - // it was possible at one point for npm packages to not just be lowercase - let root_dir = crate::deno_dir::DenoDir::new(None).unwrap().root; - let cache = ReadonlyNpmCache::new(root_dir.clone()); - let registry_url = Url::parse("https://registry.npmjs.org/").unwrap(); - let json_uppercase_hash = - "db1a21a0bc2ef8fbe13ac4cf044e8c9116d29137d5ed8b916ab63dcb2d4290df"; assert_eq!( - cache.package_folder( - &NpmPackageId { - name: "JSON".to_string(), - version: NpmVersion::parse("1.2.5").unwrap(), - }, - ®istry_url, - ), - root_dir - .join("registry.npmjs.org") - .join("_") - .join(json_uppercase_hash) - .join("1.2.5"), - ); - assert_eq!( - cache.package_folder( - &NpmPackageId { - name: "@types/JSON".to_string(), + cache.package_folder_for_id( + &NpmPackageCacheFolderId { + name: "json".to_string(), version: NpmVersion::parse("1.2.5").unwrap(), + copy_index: 1, }, ®istry_url, ), root_dir .join("registry.npmjs.org") - .join("_") - .join("@types") - .join(json_uppercase_hash) - .join("1.2.5"), + .join("json") + .join("1.2.5_1"), ); } } |