summaryrefslogtreecommitdiff
path: root/cli/npm/cache.rs
diff options
context:
space:
mode:
authorDavid Sherret <dsherret@users.noreply.github.com>2022-11-08 14:17:24 -0500
committerGitHub <noreply@github.com>2022-11-08 14:17:24 -0500
commitcbb3f854332c348bb253e1284f7dcd7287bdf28d (patch)
tree93e2db9439bd745d48118a931bdc8bea61b81af5 /cli/npm/cache.rs
parent2c72e8d5f45f12948310c1f0e1e2ed4f1d80fb51 (diff)
feat(unstable/npm): support peer dependencies (#16561)
This adds support for peer dependencies in npm packages. 1. If not found higher in the tree (ancestor and ancestor siblings), peer dependencies are resolved like a dependency similar to npm 7. 2. Optional peer dependencies are only resolved if found higher in the tree. 3. This creates "copy packages" or duplicates of a package when a package has different resolution due to peer dependency resolution—see https://pnpm.io/how-peers-are-resolved. Unlike pnpm though, duplicates of packages will have `_1`, `_2`, etc. added to the end of the package version in the directory in order to minimize the chance of hitting the max file path limit on Windows. This is done for both the local "node_modules" directory and also the global npm cache. The files are hard linked in this case to reduce hard drive space. This is a first pass and the code is definitely more inefficient than it could be. Closes #15823
Diffstat (limited to 'cli/npm/cache.rs')
-rw-r--r--cli/npm/cache.rs336
1 files changed, 238 insertions, 98 deletions
diff --git a/cli/npm/cache.rs b/cli/npm/cache.rs
index 6a0d72b3a..2d983fa06 100644
--- a/cli/npm/cache.rs
+++ b/cli/npm/cache.rs
@@ -21,7 +21,6 @@ use crate::progress_bar::ProgressBar;
use super::registry::NpmPackageVersionDistInfo;
use super::semver::NpmVersion;
use super::tarball::verify_and_extract_tarball;
-use super::NpmPackageId;
/// For some of the tests, we want downloading of packages
/// to be deterministic so that the output is always the same
@@ -29,7 +28,107 @@ pub fn should_sync_download() -> bool {
std::env::var("DENO_UNSTABLE_NPM_SYNC_DOWNLOAD") == Ok("1".to_string())
}
-pub const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock";
+const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock";
+
+pub fn with_folder_sync_lock(
+ package: (&str, &NpmVersion),
+ output_folder: &Path,
+ action: impl FnOnce() -> Result<(), AnyError>,
+) -> Result<(), AnyError> {
+ fn inner(
+ output_folder: &Path,
+ action: impl FnOnce() -> Result<(), AnyError>,
+ ) -> Result<(), AnyError> {
+ fs::create_dir_all(output_folder).with_context(|| {
+ format!("Error creating '{}'.", output_folder.display())
+ })?;
+
+ // This sync lock file is a way to ensure that partially created
+ // npm package directories aren't considered valid. This could maybe
+ // be a bit smarter in the future to not bother extracting here
+ // if another process has taken the lock in the past X seconds and
+ // wait for the other process to finish (it could try to create the
+ // file with `create_new(true)` then if it exists, check the metadata
+ // then wait until the other process finishes with a timeout), but
+ // for now this is good enough.
+ let sync_lock_path = output_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME);
+ match fs::OpenOptions::new()
+ .write(true)
+ .create(true)
+ .open(&sync_lock_path)
+ {
+ Ok(_) => {
+ action()?;
+ // extraction succeeded, so only now delete this file
+ let _ignore = std::fs::remove_file(&sync_lock_path);
+ Ok(())
+ }
+ Err(err) => {
+ bail!(
+ concat!(
+ "Error creating package sync lock file at '{}'. ",
+ "Maybe try manually deleting this folder.\n\n{:#}",
+ ),
+ output_folder.display(),
+ err
+ );
+ }
+ }
+ }
+
+ match inner(output_folder, action) {
+ Ok(()) => Ok(()),
+ Err(err) => {
+ if let Err(remove_err) = fs::remove_dir_all(&output_folder) {
+ if remove_err.kind() != std::io::ErrorKind::NotFound {
+ bail!(
+ concat!(
+ "Failed setting up package cache directory for {}@{}, then ",
+ "failed cleaning it up.\n\nOriginal error:\n\n{}\n\n",
+ "Remove error:\n\n{}\n\nPlease manually ",
+ "delete this folder or you will run into issues using this ",
+ "package in the future:\n\n{}"
+ ),
+ package.0,
+ package.1,
+ err,
+ remove_err,
+ output_folder.display(),
+ );
+ }
+ }
+ Err(err)
+ }
+ }
+}
+
+pub struct NpmPackageCacheFolderId {
+ pub name: String,
+ pub version: NpmVersion,
+ /// Peer dependency resolution may require us to have duplicate copies
+ /// of the same package.
+ pub copy_index: usize,
+}
+
+impl NpmPackageCacheFolderId {
+ pub fn with_no_count(&self) -> Self {
+ Self {
+ name: self.name.clone(),
+ version: self.version.clone(),
+ copy_index: 0,
+ }
+ }
+}
+
+impl std::fmt::Display for NpmPackageCacheFolderId {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}@{}", self.name, self.version)?;
+ if self.copy_index > 0 {
+ write!(f, "_{}", self.copy_index)?;
+ }
+ Ok(())
+ }
+}
#[derive(Clone, Debug)]
pub struct ReadonlyNpmCache {
@@ -78,32 +177,49 @@ impl ReadonlyNpmCache {
Self::new(dir.root.join("npm"))
}
- pub fn package_folder(
+ pub fn package_folder_for_id(
&self,
- id: &NpmPackageId,
+ id: &NpmPackageCacheFolderId,
+ registry_url: &Url,
+ ) -> PathBuf {
+ if id.copy_index == 0 {
+ self.package_folder_for_name_and_version(
+ &id.name,
+ &id.version,
+ registry_url,
+ )
+ } else {
+ self
+ .package_name_folder(&id.name, registry_url)
+ .join(format!("{}_{}", id.version, id.copy_index))
+ }
+ }
+
+ pub fn package_folder_for_name_and_version(
+ &self,
+ name: &str,
+ version: &NpmVersion,
registry_url: &Url,
) -> PathBuf {
self
- .package_name_folder(&id.name, registry_url)
- .join(id.version.to_string())
+ .package_name_folder(name, registry_url)
+ .join(version.to_string())
}
pub fn package_name_folder(&self, name: &str, registry_url: &Url) -> PathBuf {
let mut dir = self.registry_folder(registry_url);
- let mut parts = name.split('/').map(Cow::Borrowed).collect::<Vec<_>>();
- // package names were not always enforced to be lowercase and so we need
- // to ensure package names, which are therefore case sensitive, are stored
- // on a case insensitive file system to not have conflicts. We do this by
- // first putting it in a "_" folder then hashing the package name.
+ let parts = name.split('/').map(Cow::Borrowed).collect::<Vec<_>>();
if name.to_lowercase() != name {
- let last_part = parts.last_mut().unwrap();
- *last_part = Cow::Owned(crate::checksum::gen(&[last_part.as_bytes()]));
- // We can't just use the hash as part of the directory because it may
- // have a collision with an actual package name in case someone wanted
- // to name an actual package that. To get around this, put all these
- // in a folder called "_" since npm packages can't start with an underscore
- // and there is no package currently called just "_".
- dir = dir.join("_");
+ // Lowercase package names introduce complications.
+ // When implementing this ensure:
+ // 1. It works on case insensitive filesystems. ex. JSON should not
+ // conflict with json... yes you read that right, those are separate
+ // packages.
+ // 2. We can figure out the package id from the path. This is used
+ // in resolve_package_id_from_specifier
+ // Probably use a hash of the package name at `npm/-/<hash>` then create
+ // a mapping for these package names.
+ todo!("deno currently doesn't support npm package names that are not all lowercase");
}
// ensure backslashes are used on windows
for part in parts {
@@ -118,23 +234,24 @@ impl ReadonlyNpmCache {
.join(fs_util::root_url_to_safe_local_dirname(registry_url))
}
- pub fn resolve_package_id_from_specifier(
+ pub fn resolve_package_folder_id_from_specifier(
&self,
specifier: &ModuleSpecifier,
registry_url: &Url,
- ) -> Result<NpmPackageId, AnyError> {
- match self.maybe_resolve_package_id_from_specifier(specifier, registry_url)
+ ) -> Result<NpmPackageCacheFolderId, AnyError> {
+ match self
+ .maybe_resolve_package_folder_id_from_specifier(specifier, registry_url)
{
Some(id) => Ok(id),
None => bail!("could not find npm package for '{}'", specifier),
}
}
- fn maybe_resolve_package_id_from_specifier(
+ fn maybe_resolve_package_folder_id_from_specifier(
&self,
specifier: &ModuleSpecifier,
registry_url: &Url,
- ) -> Option<NpmPackageId> {
+ ) -> Option<NpmPackageCacheFolderId> {
let registry_root_dir = self
.root_dir_url
.join(&format!(
@@ -153,6 +270,7 @@ impl ReadonlyNpmCache {
// examples:
// * chalk/5.0.1/
// * @types/chalk/5.0.1/
+ // * some-package/5.0.1_1/ -- where the `_1` (/_\d+/) is a copy of the folder for peer deps
let is_scoped_package = relative_url.starts_with('@');
let mut parts = relative_url
.split('/')
@@ -163,11 +281,19 @@ impl ReadonlyNpmCache {
if parts.len() < 2 {
return None;
}
- let version = parts.pop().unwrap();
+ let version_part = parts.pop().unwrap();
let name = parts.join("/");
- NpmVersion::parse(version)
- .ok()
- .map(|version| NpmPackageId { name, version })
+ let (version, copy_index) =
+ if let Some((version, copy_count)) = version_part.split_once('_') {
+ (version, copy_count.parse::<usize>().ok()?)
+ } else {
+ (version_part, 0)
+ };
+ Some(NpmPackageCacheFolderId {
+ name,
+ version: NpmVersion::parse(version).ok()?,
+ copy_index,
+ })
}
pub fn get_cache_location(&self) -> PathBuf {
@@ -202,28 +328,38 @@ impl NpmCache {
pub async fn ensure_package(
&self,
- id: &NpmPackageId,
+ package: (&str, &NpmVersion),
dist: &NpmPackageVersionDistInfo,
registry_url: &Url,
) -> Result<(), AnyError> {
self
- .ensure_package_inner(id, dist, registry_url)
+ .ensure_package_inner(package, dist, registry_url)
.await
- .with_context(|| format!("Failed caching npm package '{}'.", id))
+ .with_context(|| {
+ format!("Failed caching npm package '{}@{}'.", package.0, package.1)
+ })
+ }
+
+ pub fn should_use_cache_for_npm_package(&self, package_name: &str) -> bool {
+ self.cache_setting.should_use_for_npm_package(package_name)
}
async fn ensure_package_inner(
&self,
- id: &NpmPackageId,
+ package: (&str, &NpmVersion),
dist: &NpmPackageVersionDistInfo,
registry_url: &Url,
) -> Result<(), AnyError> {
- let package_folder = self.readonly.package_folder(id, registry_url);
+ let package_folder = self.readonly.package_folder_for_name_and_version(
+ package.0,
+ package.1,
+ registry_url,
+ );
if package_folder.exists()
// if this file exists, then the package didn't successfully extract
// the first time, or another process is currently extracting the zip file
&& !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists()
- && self.cache_setting.should_use_for_npm_package(&id.name)
+ && self.should_use_cache_for_npm_package(package.0)
{
return Ok(());
} else if self.cache_setting == CacheSetting::Only {
@@ -231,7 +367,7 @@ impl NpmCache {
"NotCached",
format!(
"An npm specifier not found in cache: \"{}\", --cached-only is specified.",
- id.name
+ &package.0
)
)
);
@@ -256,38 +392,66 @@ impl NpmCache {
} else {
let bytes = response.bytes().await?;
- match verify_and_extract_tarball(id, &bytes, dist, &package_folder) {
- Ok(()) => Ok(()),
- Err(err) => {
- if let Err(remove_err) = fs::remove_dir_all(&package_folder) {
- if remove_err.kind() != std::io::ErrorKind::NotFound {
- bail!(
- concat!(
- "Failed verifying and extracting npm tarball for {}, then ",
- "failed cleaning up package cache folder.\n\nOriginal ",
- "error:\n\n{}\n\nRemove error:\n\n{}\n\nPlease manually ",
- "delete this folder or you will run into issues using this ",
- "package in the future:\n\n{}"
- ),
- id,
- err,
- remove_err,
- package_folder.display(),
- );
- }
- }
- Err(err)
- }
- }
+ verify_and_extract_tarball(package, &bytes, dist, &package_folder)
}
}
- pub fn package_folder(
+ /// Ensures a copy of the package exists in the global cache.
+ ///
+ /// This assumes that the original package folder being hard linked
+ /// from exists before this is called.
+ pub fn ensure_copy_package(
&self,
- id: &NpmPackageId,
+ id: &NpmPackageCacheFolderId,
+ registry_url: &Url,
+ ) -> Result<(), AnyError> {
+ assert_ne!(id.copy_index, 0);
+ let package_folder = self.readonly.package_folder_for_id(id, registry_url);
+
+ if package_folder.exists()
+ // if this file exists, then the package didn't successfully extract
+ // the first time, or another process is currently extracting the zip file
+ && !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists()
+ && self.cache_setting.should_use_for_npm_package(&id.name)
+ {
+ return Ok(());
+ }
+
+ let original_package_folder = self
+ .readonly
+ .package_folder_for_name_and_version(&id.name, &id.version, registry_url);
+ with_folder_sync_lock(
+ (id.name.as_str(), &id.version),
+ &package_folder,
+ || {
+ fs_util::hard_link_dir_recursive(
+ &original_package_folder,
+ &package_folder,
+ )
+ },
+ )?;
+ Ok(())
+ }
+
+ pub fn package_folder_for_id(
+ &self,
+ id: &NpmPackageCacheFolderId,
+ registry_url: &Url,
+ ) -> PathBuf {
+ self.readonly.package_folder_for_id(id, registry_url)
+ }
+
+ pub fn package_folder_for_name_and_version(
+ &self,
+ name: &str,
+ version: &NpmVersion,
registry_url: &Url,
) -> PathBuf {
- self.readonly.package_folder(id, registry_url)
+ self.readonly.package_folder_for_name_and_version(
+ name,
+ version,
+ registry_url,
+ )
}
pub fn package_name_folder(&self, name: &str, registry_url: &Url) -> PathBuf {
@@ -298,14 +462,14 @@ impl NpmCache {
self.readonly.registry_folder(registry_url)
}
- pub fn resolve_package_id_from_specifier(
+ pub fn resolve_package_folder_id_from_specifier(
&self,
specifier: &ModuleSpecifier,
registry_url: &Url,
- ) -> Result<NpmPackageId, AnyError> {
+ ) -> Result<NpmPackageCacheFolderId, AnyError> {
self
.readonly
- .resolve_package_id_from_specifier(specifier, registry_url)
+ .resolve_package_folder_id_from_specifier(specifier, registry_url)
}
}
@@ -314,8 +478,8 @@ mod test {
use deno_core::url::Url;
use super::ReadonlyNpmCache;
+ use crate::npm::cache::NpmPackageCacheFolderId;
use crate::npm::semver::NpmVersion;
- use crate::npm::NpmPackageId;
#[test]
fn should_get_lowercase_package_folder() {
@@ -323,12 +487,12 @@ mod test {
let cache = ReadonlyNpmCache::new(root_dir.clone());
let registry_url = Url::parse("https://registry.npmjs.org/").unwrap();
- // all lowercase should be as-is
assert_eq!(
- cache.package_folder(
- &NpmPackageId {
+ cache.package_folder_for_id(
+ &NpmPackageCacheFolderId {
name: "json".to_string(),
version: NpmVersion::parse("1.2.5").unwrap(),
+ copy_index: 0,
},
&registry_url,
),
@@ -337,44 +501,20 @@ mod test {
.join("json")
.join("1.2.5"),
);
- }
- #[test]
- fn should_handle_non_all_lowercase_package_names() {
- // it was possible at one point for npm packages to not just be lowercase
- let root_dir = crate::deno_dir::DenoDir::new(None).unwrap().root;
- let cache = ReadonlyNpmCache::new(root_dir.clone());
- let registry_url = Url::parse("https://registry.npmjs.org/").unwrap();
- let json_uppercase_hash =
- "db1a21a0bc2ef8fbe13ac4cf044e8c9116d29137d5ed8b916ab63dcb2d4290df";
assert_eq!(
- cache.package_folder(
- &NpmPackageId {
- name: "JSON".to_string(),
- version: NpmVersion::parse("1.2.5").unwrap(),
- },
- &registry_url,
- ),
- root_dir
- .join("registry.npmjs.org")
- .join("_")
- .join(json_uppercase_hash)
- .join("1.2.5"),
- );
- assert_eq!(
- cache.package_folder(
- &NpmPackageId {
- name: "@types/JSON".to_string(),
+ cache.package_folder_for_id(
+ &NpmPackageCacheFolderId {
+ name: "json".to_string(),
version: NpmVersion::parse("1.2.5").unwrap(),
+ copy_index: 1,
},
&registry_url,
),
root_dir
.join("registry.npmjs.org")
- .join("_")
- .join("@types")
- .join(json_uppercase_hash)
- .join("1.2.5"),
+ .join("json")
+ .join("1.2.5_1"),
);
}
}