From 148694eb351ea3f733852b7786a3268617811e27 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Mon, 2 Oct 2023 17:53:55 -0400 Subject: refactor(npm): make `NpmCache`, `CliNpmRegistryApi`, and `NpmResolution` internal to `npm::managed` (#20764) --- cli/npm/cache.rs | 536 ------------------------------------ cli/npm/cache_dir.rs | 268 ++++++++++++++++++ cli/npm/managed/cache.rs | 278 +++++++++++++++++++ cli/npm/managed/installer.rs | 2 +- cli/npm/managed/mod.rs | 285 +++++++++++++++++-- cli/npm/managed/registry.rs | 358 ++++++++++++++++++++++++ cli/npm/managed/resolution.rs | 14 +- cli/npm/managed/resolvers/common.rs | 2 +- cli/npm/managed/resolvers/global.rs | 3 +- cli/npm/managed/resolvers/local.rs | 6 +- cli/npm/managed/resolvers/mod.rs | 5 +- cli/npm/managed/tarball.rs | 241 ++++++++++++++++ cli/npm/mod.rs | 53 +++- cli/npm/registry.rs | 381 ------------------------- cli/npm/tarball.rs | 241 ---------------- 15 files changed, 1456 insertions(+), 1217 deletions(-) delete mode 100644 cli/npm/cache.rs create mode 100644 cli/npm/cache_dir.rs create mode 100644 cli/npm/managed/cache.rs create mode 100644 cli/npm/managed/registry.rs create mode 100644 cli/npm/managed/tarball.rs delete mode 100644 cli/npm/registry.rs delete mode 100644 cli/npm/tarball.rs (limited to 'cli/npm') diff --git a/cli/npm/cache.rs b/cli/npm/cache.rs deleted file mode 100644 index f76bf6821..000000000 --- a/cli/npm/cache.rs +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. - -use std::collections::HashSet; -use std::fs; -use std::path::Path; -use std::path::PathBuf; -use std::sync::Arc; - -use deno_ast::ModuleSpecifier; -use deno_core::anyhow::bail; -use deno_core::anyhow::Context; -use deno_core::error::custom_error; -use deno_core::error::AnyError; -use deno_core::parking_lot::Mutex; -use deno_core::url::Url; -use deno_npm::registry::NpmPackageVersionDistInfo; -use deno_npm::NpmPackageCacheFolderId; -use deno_runtime::deno_fs; -use deno_semver::package::PackageNv; -use deno_semver::Version; - -use crate::args::CacheSetting; -use crate::http_util::HttpClient; -use crate::util::fs::canonicalize_path; -use crate::util::fs::hard_link_dir_recursive; -use crate::util::path::root_url_to_safe_local_dirname; -use crate::util::progress_bar::ProgressBar; - -use super::tarball::verify_and_extract_tarball; - -const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock"; - -pub fn with_folder_sync_lock( - package: &PackageNv, - output_folder: &Path, - action: impl FnOnce() -> Result<(), AnyError>, -) -> Result<(), AnyError> { - fn inner( - output_folder: &Path, - action: impl FnOnce() -> Result<(), AnyError>, - ) -> Result<(), AnyError> { - fs::create_dir_all(output_folder).with_context(|| { - format!("Error creating '{}'.", output_folder.display()) - })?; - - // This sync lock file is a way to ensure that partially created - // npm package directories aren't considered valid. This could maybe - // be a bit smarter in the future to not bother extracting here - // if another process has taken the lock in the past X seconds and - // wait for the other process to finish (it could try to create the - // file with `create_new(true)` then if it exists, check the metadata - // then wait until the other process finishes with a timeout), but - // for now this is good enough. - let sync_lock_path = output_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME); - match fs::OpenOptions::new() - .write(true) - .create(true) - .open(&sync_lock_path) - { - Ok(_) => { - action()?; - // extraction succeeded, so only now delete this file - let _ignore = std::fs::remove_file(&sync_lock_path); - Ok(()) - } - Err(err) => { - bail!( - concat!( - "Error creating package sync lock file at '{}'. ", - "Maybe try manually deleting this folder.\n\n{:#}", - ), - output_folder.display(), - err - ); - } - } - } - - match inner(output_folder, action) { - Ok(()) => Ok(()), - Err(err) => { - if let Err(remove_err) = fs::remove_dir_all(output_folder) { - if remove_err.kind() != std::io::ErrorKind::NotFound { - bail!( - concat!( - "Failed setting up package cache directory for {}, then ", - "failed cleaning it up.\n\nOriginal error:\n\n{}\n\n", - "Remove error:\n\n{}\n\nPlease manually ", - "delete this folder or you will run into issues using this ", - "package in the future:\n\n{}" - ), - package, - err, - remove_err, - output_folder.display(), - ); - } - } - Err(err) - } - } -} - -#[derive(Clone, Debug)] -pub struct NpmCacheDir { - root_dir: PathBuf, - // cached url representation of the root directory - root_dir_url: Url, -} - -impl NpmCacheDir { - pub fn new(root_dir: PathBuf) -> Self { - fn try_get_canonicalized_root_dir( - root_dir: &Path, - ) -> Result { - if !root_dir.exists() { - std::fs::create_dir_all(root_dir) - .with_context(|| format!("Error creating {}", root_dir.display()))?; - } - Ok(canonicalize_path(root_dir)?) - } - - // this may fail on readonly file systems, so just ignore if so - let root_dir = - try_get_canonicalized_root_dir(&root_dir).unwrap_or(root_dir); - let root_dir_url = Url::from_directory_path(&root_dir).unwrap(); - Self { - root_dir, - root_dir_url, - } - } - - pub fn root_dir_url(&self) -> &Url { - &self.root_dir_url - } - - pub fn package_folder_for_id( - &self, - folder_id: &NpmPackageCacheFolderId, - registry_url: &Url, - ) -> PathBuf { - if folder_id.copy_index == 0 { - self.package_folder_for_name_and_version(&folder_id.nv, registry_url) - } else { - self - .package_name_folder(&folder_id.nv.name, registry_url) - .join(format!("{}_{}", folder_id.nv.version, folder_id.copy_index)) - } - } - - pub fn package_folder_for_name_and_version( - &self, - package: &PackageNv, - registry_url: &Url, - ) -> PathBuf { - self - .package_name_folder(&package.name, registry_url) - .join(package.version.to_string()) - } - - pub fn package_name_folder(&self, name: &str, registry_url: &Url) -> PathBuf { - let mut dir = self.registry_folder(registry_url); - if name.to_lowercase() != name { - let encoded_name = mixed_case_package_name_encode(name); - // Using the encoded directory may have a collision with an actual package name - // so prefix it with an underscore since npm packages can't start with that - dir.join(format!("_{encoded_name}")) - } else { - // ensure backslashes are used on windows - for part in name.split('/') { - dir = dir.join(part); - } - dir - } - } - - pub fn registry_folder(&self, registry_url: &Url) -> PathBuf { - self - .root_dir - .join(root_url_to_safe_local_dirname(registry_url)) - } - - pub fn resolve_package_folder_id_from_specifier( - &self, - specifier: &ModuleSpecifier, - registry_url: &Url, - ) -> Option { - let registry_root_dir = self - .root_dir_url - .join(&format!( - "{}/", - root_url_to_safe_local_dirname(registry_url) - .to_string_lossy() - .replace('\\', "/") - )) - // this not succeeding indicates a fatal issue, so unwrap - .unwrap(); - let mut relative_url = registry_root_dir.make_relative(specifier)?; - if relative_url.starts_with("../") { - return None; - } - - // base32 decode the url if it starts with an underscore - // * Ex. _{base32(package_name)}/ - if let Some(end_url) = relative_url.strip_prefix('_') { - let mut parts = end_url - .split('/') - .map(ToOwned::to_owned) - .collect::>(); - match mixed_case_package_name_decode(&parts[0]) { - Some(part) => { - parts[0] = part; - } - None => return None, - } - relative_url = parts.join("/"); - } - - // examples: - // * chalk/5.0.1/ - // * @types/chalk/5.0.1/ - // * some-package/5.0.1_1/ -- where the `_1` (/_\d+/) is a copy of the folder for peer deps - let is_scoped_package = relative_url.starts_with('@'); - let mut parts = relative_url - .split('/') - .enumerate() - .take(if is_scoped_package { 3 } else { 2 }) - .map(|(_, part)| part) - .collect::>(); - if parts.len() < 2 { - return None; - } - let version_part = parts.pop().unwrap(); - let name = parts.join("/"); - let (version, copy_index) = - if let Some((version, copy_count)) = version_part.split_once('_') { - (version, copy_count.parse::().ok()?) - } else { - (version_part, 0) - }; - Some(NpmPackageCacheFolderId { - nv: PackageNv { - name, - version: Version::parse_from_npm(version).ok()?, - }, - copy_index, - }) - } - - pub fn get_cache_location(&self) -> PathBuf { - self.root_dir.clone() - } -} - -/// Stores a single copy of npm packages in a cache. -#[derive(Debug)] -pub struct NpmCache { - cache_dir: NpmCacheDir, - cache_setting: CacheSetting, - fs: Arc, - http_client: Arc, - progress_bar: ProgressBar, - /// ensures a package is only downloaded once per run - previously_reloaded_packages: Mutex>, -} - -impl NpmCache { - pub fn new( - cache_dir: NpmCacheDir, - cache_setting: CacheSetting, - fs: Arc, - http_client: Arc, - progress_bar: ProgressBar, - ) -> Self { - Self { - cache_dir, - cache_setting, - fs, - http_client, - progress_bar, - previously_reloaded_packages: Default::default(), - } - } - - pub fn as_readonly(&self) -> NpmCacheDir { - self.cache_dir.clone() - } - - pub fn cache_setting(&self) -> &CacheSetting { - &self.cache_setting - } - - pub fn root_dir_url(&self) -> &Url { - self.cache_dir.root_dir_url() - } - - /// Checks if the cache should be used for the provided name and version. - /// NOTE: Subsequent calls for the same package will always return `true` - /// to ensure a package is only downloaded once per run of the CLI. This - /// prevents downloads from re-occurring when someone has `--reload` and - /// and imports a dynamic import that imports the same package again for example. - fn should_use_global_cache_for_package(&self, package: &PackageNv) -> bool { - self.cache_setting.should_use_for_npm_package(&package.name) - || !self - .previously_reloaded_packages - .lock() - .insert(package.clone()) - } - - pub async fn ensure_package( - &self, - package: &PackageNv, - dist: &NpmPackageVersionDistInfo, - registry_url: &Url, - ) -> Result<(), AnyError> { - self - .ensure_package_inner(package, dist, registry_url) - .await - .with_context(|| format!("Failed caching npm package '{package}'.")) - } - - async fn ensure_package_inner( - &self, - package: &PackageNv, - dist: &NpmPackageVersionDistInfo, - registry_url: &Url, - ) -> Result<(), AnyError> { - let package_folder = self - .cache_dir - .package_folder_for_name_and_version(package, registry_url); - if self.should_use_global_cache_for_package(package) - && self.fs.exists_sync(&package_folder) - // if this file exists, then the package didn't successfully extract - // the first time, or another process is currently extracting the zip file - && !self.fs.exists_sync(&package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME)) - { - return Ok(()); - } else if self.cache_setting == CacheSetting::Only { - return Err(custom_error( - "NotCached", - format!( - "An npm specifier not found in cache: \"{}\", --cached-only is specified.", - &package.name - ) - ) - ); - } - - if dist.tarball.is_empty() { - bail!("Tarball URL was empty."); - } - - let guard = self.progress_bar.update(&dist.tarball); - let maybe_bytes = self - .http_client - .download_with_progress(&dist.tarball, &guard) - .await?; - match maybe_bytes { - Some(bytes) => { - verify_and_extract_tarball(package, &bytes, dist, &package_folder) - } - None => { - bail!("Could not find npm package tarball at: {}", dist.tarball); - } - } - } - - /// Ensures a copy of the package exists in the global cache. - /// - /// This assumes that the original package folder being hard linked - /// from exists before this is called. - pub fn ensure_copy_package( - &self, - folder_id: &NpmPackageCacheFolderId, - registry_url: &Url, - ) -> Result<(), AnyError> { - assert_ne!(folder_id.copy_index, 0); - let package_folder = self - .cache_dir - .package_folder_for_id(folder_id, registry_url); - - if package_folder.exists() - // if this file exists, then the package didn't successfully extract - // the first time, or another process is currently extracting the zip file - && !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists() - && self.cache_setting.should_use_for_npm_package(&folder_id.nv.name) - { - return Ok(()); - } - - let original_package_folder = self - .cache_dir - .package_folder_for_name_and_version(&folder_id.nv, registry_url); - with_folder_sync_lock(&folder_id.nv, &package_folder, || { - hard_link_dir_recursive(&original_package_folder, &package_folder) - })?; - Ok(()) - } - - pub fn package_folder_for_id( - &self, - id: &NpmPackageCacheFolderId, - registry_url: &Url, - ) -> PathBuf { - self.cache_dir.package_folder_for_id(id, registry_url) - } - - pub fn package_folder_for_name_and_version( - &self, - package: &PackageNv, - registry_url: &Url, - ) -> PathBuf { - self - .cache_dir - .package_folder_for_name_and_version(package, registry_url) - } - - pub fn package_name_folder(&self, name: &str, registry_url: &Url) -> PathBuf { - self.cache_dir.package_name_folder(name, registry_url) - } - - pub fn registry_folder(&self, registry_url: &Url) -> PathBuf { - self.cache_dir.registry_folder(registry_url) - } - - pub fn resolve_package_folder_id_from_specifier( - &self, - specifier: &ModuleSpecifier, - registry_url: &Url, - ) -> Option { - self - .cache_dir - .resolve_package_folder_id_from_specifier(specifier, registry_url) - } -} - -pub fn mixed_case_package_name_encode(name: &str) -> String { - // use base32 encoding because it's reversible and the character set - // only includes the characters within 0-9 and A-Z so it can be lower cased - base32::encode( - base32::Alphabet::RFC4648 { padding: false }, - name.as_bytes(), - ) - .to_lowercase() -} - -pub fn mixed_case_package_name_decode(name: &str) -> Option { - base32::decode(base32::Alphabet::RFC4648 { padding: false }, name) - .and_then(|b| String::from_utf8(b).ok()) -} - -#[cfg(test)] -mod test { - use deno_core::url::Url; - use deno_semver::package::PackageNv; - use deno_semver::Version; - - use super::NpmCacheDir; - use crate::npm::cache::NpmPackageCacheFolderId; - - #[test] - fn should_get_package_folder() { - let deno_dir = crate::cache::DenoDir::new(None).unwrap(); - let root_dir = deno_dir.npm_folder_path(); - let cache = NpmCacheDir::new(root_dir.clone()); - let registry_url = Url::parse("https://registry.npmjs.org/").unwrap(); - - assert_eq!( - cache.package_folder_for_id( - &NpmPackageCacheFolderId { - nv: PackageNv { - name: "json".to_string(), - version: Version::parse_from_npm("1.2.5").unwrap(), - }, - copy_index: 0, - }, - ®istry_url, - ), - root_dir - .join("registry.npmjs.org") - .join("json") - .join("1.2.5"), - ); - - assert_eq!( - cache.package_folder_for_id( - &NpmPackageCacheFolderId { - nv: PackageNv { - name: "json".to_string(), - version: Version::parse_from_npm("1.2.5").unwrap(), - }, - copy_index: 1, - }, - ®istry_url, - ), - root_dir - .join("registry.npmjs.org") - .join("json") - .join("1.2.5_1"), - ); - - assert_eq!( - cache.package_folder_for_id( - &NpmPackageCacheFolderId { - nv: PackageNv { - name: "JSON".to_string(), - version: Version::parse_from_npm("2.1.5").unwrap(), - }, - copy_index: 0, - }, - ®istry_url, - ), - root_dir - .join("registry.npmjs.org") - .join("_jjju6tq") - .join("2.1.5"), - ); - - assert_eq!( - cache.package_folder_for_id( - &NpmPackageCacheFolderId { - nv: PackageNv { - name: "@types/JSON".to_string(), - version: Version::parse_from_npm("2.1.5").unwrap(), - }, - copy_index: 0, - }, - ®istry_url, - ), - root_dir - .join("registry.npmjs.org") - .join("_ib2hs4dfomxuuu2pjy") - .join("2.1.5"), - ); - } -} diff --git a/cli/npm/cache_dir.rs b/cli/npm/cache_dir.rs new file mode 100644 index 000000000..b0d049047 --- /dev/null +++ b/cli/npm/cache_dir.rs @@ -0,0 +1,268 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::path::Path; +use std::path::PathBuf; + +use deno_ast::ModuleSpecifier; +use deno_core::anyhow::Context; +use deno_core::error::AnyError; +use deno_core::url::Url; +use deno_npm::NpmPackageCacheFolderId; +use deno_semver::package::PackageNv; +use deno_semver::Version; + +use crate::util::fs::canonicalize_path; +use crate::util::path::root_url_to_safe_local_dirname; + +/// The global cache directory of npm packages. +#[derive(Clone, Debug)] +pub struct NpmCacheDir { + root_dir: PathBuf, + // cached url representation of the root directory + root_dir_url: Url, +} + +impl NpmCacheDir { + pub fn new(root_dir: PathBuf) -> Self { + fn try_get_canonicalized_root_dir( + root_dir: &Path, + ) -> Result { + if !root_dir.exists() { + std::fs::create_dir_all(root_dir) + .with_context(|| format!("Error creating {}", root_dir.display()))?; + } + Ok(canonicalize_path(root_dir)?) + } + + // this may fail on readonly file systems, so just ignore if so + let root_dir = + try_get_canonicalized_root_dir(&root_dir).unwrap_or(root_dir); + let root_dir_url = Url::from_directory_path(&root_dir).unwrap(); + Self { + root_dir, + root_dir_url, + } + } + + pub fn root_dir_url(&self) -> &Url { + &self.root_dir_url + } + + pub fn package_folder_for_id( + &self, + folder_id: &NpmPackageCacheFolderId, + registry_url: &Url, + ) -> PathBuf { + if folder_id.copy_index == 0 { + self.package_folder_for_name_and_version(&folder_id.nv, registry_url) + } else { + self + .package_name_folder(&folder_id.nv.name, registry_url) + .join(format!("{}_{}", folder_id.nv.version, folder_id.copy_index)) + } + } + + pub fn package_folder_for_name_and_version( + &self, + package: &PackageNv, + registry_url: &Url, + ) -> PathBuf { + self + .package_name_folder(&package.name, registry_url) + .join(package.version.to_string()) + } + + pub fn package_name_folder(&self, name: &str, registry_url: &Url) -> PathBuf { + let mut dir = self.registry_folder(registry_url); + if name.to_lowercase() != name { + let encoded_name = mixed_case_package_name_encode(name); + // Using the encoded directory may have a collision with an actual package name + // so prefix it with an underscore since npm packages can't start with that + dir.join(format!("_{encoded_name}")) + } else { + // ensure backslashes are used on windows + for part in name.split('/') { + dir = dir.join(part); + } + dir + } + } + + pub fn registry_folder(&self, registry_url: &Url) -> PathBuf { + self + .root_dir + .join(root_url_to_safe_local_dirname(registry_url)) + } + + pub fn resolve_package_folder_id_from_specifier( + &self, + specifier: &ModuleSpecifier, + registry_url: &Url, + ) -> Option { + let registry_root_dir = self + .root_dir_url + .join(&format!( + "{}/", + root_url_to_safe_local_dirname(registry_url) + .to_string_lossy() + .replace('\\', "/") + )) + // this not succeeding indicates a fatal issue, so unwrap + .unwrap(); + let mut relative_url = registry_root_dir.make_relative(specifier)?; + if relative_url.starts_with("../") { + return None; + } + + // base32 decode the url if it starts with an underscore + // * Ex. _{base32(package_name)}/ + if let Some(end_url) = relative_url.strip_prefix('_') { + let mut parts = end_url + .split('/') + .map(ToOwned::to_owned) + .collect::>(); + match mixed_case_package_name_decode(&parts[0]) { + Some(part) => { + parts[0] = part; + } + None => return None, + } + relative_url = parts.join("/"); + } + + // examples: + // * chalk/5.0.1/ + // * @types/chalk/5.0.1/ + // * some-package/5.0.1_1/ -- where the `_1` (/_\d+/) is a copy of the folder for peer deps + let is_scoped_package = relative_url.starts_with('@'); + let mut parts = relative_url + .split('/') + .enumerate() + .take(if is_scoped_package { 3 } else { 2 }) + .map(|(_, part)| part) + .collect::>(); + if parts.len() < 2 { + return None; + } + let version_part = parts.pop().unwrap(); + let name = parts.join("/"); + let (version, copy_index) = + if let Some((version, copy_count)) = version_part.split_once('_') { + (version, copy_count.parse::().ok()?) + } else { + (version_part, 0) + }; + Some(NpmPackageCacheFolderId { + nv: PackageNv { + name, + version: Version::parse_from_npm(version).ok()?, + }, + copy_index, + }) + } + + pub fn get_cache_location(&self) -> PathBuf { + self.root_dir.clone() + } +} + +pub fn mixed_case_package_name_encode(name: &str) -> String { + // use base32 encoding because it's reversible and the character set + // only includes the characters within 0-9 and A-Z so it can be lower cased + base32::encode( + base32::Alphabet::RFC4648 { padding: false }, + name.as_bytes(), + ) + .to_lowercase() +} + +pub fn mixed_case_package_name_decode(name: &str) -> Option { + base32::decode(base32::Alphabet::RFC4648 { padding: false }, name) + .and_then(|b| String::from_utf8(b).ok()) +} + +#[cfg(test)] +mod test { + use deno_core::url::Url; + use deno_semver::package::PackageNv; + use deno_semver::Version; + + use super::NpmCacheDir; + use crate::npm::cache_dir::NpmPackageCacheFolderId; + + #[test] + fn should_get_package_folder() { + let deno_dir = crate::cache::DenoDir::new(None).unwrap(); + let root_dir = deno_dir.npm_folder_path(); + let cache = NpmCacheDir::new(root_dir.clone()); + let registry_url = Url::parse("https://registry.npmjs.org/").unwrap(); + + assert_eq!( + cache.package_folder_for_id( + &NpmPackageCacheFolderId { + nv: PackageNv { + name: "json".to_string(), + version: Version::parse_from_npm("1.2.5").unwrap(), + }, + copy_index: 0, + }, + ®istry_url, + ), + root_dir + .join("registry.npmjs.org") + .join("json") + .join("1.2.5"), + ); + + assert_eq!( + cache.package_folder_for_id( + &NpmPackageCacheFolderId { + nv: PackageNv { + name: "json".to_string(), + version: Version::parse_from_npm("1.2.5").unwrap(), + }, + copy_index: 1, + }, + ®istry_url, + ), + root_dir + .join("registry.npmjs.org") + .join("json") + .join("1.2.5_1"), + ); + + assert_eq!( + cache.package_folder_for_id( + &NpmPackageCacheFolderId { + nv: PackageNv { + name: "JSON".to_string(), + version: Version::parse_from_npm("2.1.5").unwrap(), + }, + copy_index: 0, + }, + ®istry_url, + ), + root_dir + .join("registry.npmjs.org") + .join("_jjju6tq") + .join("2.1.5"), + ); + + assert_eq!( + cache.package_folder_for_id( + &NpmPackageCacheFolderId { + nv: PackageNv { + name: "@types/JSON".to_string(), + version: Version::parse_from_npm("2.1.5").unwrap(), + }, + copy_index: 0, + }, + ®istry_url, + ), + root_dir + .join("registry.npmjs.org") + .join("_ib2hs4dfomxuuu2pjy") + .join("2.1.5"), + ); + } +} diff --git a/cli/npm/managed/cache.rs b/cli/npm/managed/cache.rs new file mode 100644 index 000000000..91d6ec656 --- /dev/null +++ b/cli/npm/managed/cache.rs @@ -0,0 +1,278 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashSet; +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; + +use deno_ast::ModuleSpecifier; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::custom_error; +use deno_core::error::AnyError; +use deno_core::parking_lot::Mutex; +use deno_core::url::Url; +use deno_npm::registry::NpmPackageVersionDistInfo; +use deno_npm::NpmPackageCacheFolderId; +use deno_runtime::deno_fs; +use deno_semver::package::PackageNv; + +use crate::args::CacheSetting; +use crate::http_util::HttpClient; +use crate::npm::NpmCacheDir; +use crate::util::fs::hard_link_dir_recursive; +use crate::util::progress_bar::ProgressBar; + +use super::tarball::verify_and_extract_tarball; + +/// Stores a single copy of npm packages in a cache. +#[derive(Debug)] +pub struct NpmCache { + cache_dir: NpmCacheDir, + cache_setting: CacheSetting, + fs: Arc, + http_client: Arc, + progress_bar: ProgressBar, + /// ensures a package is only downloaded once per run + previously_reloaded_packages: Mutex>, +} + +impl NpmCache { + pub fn new( + cache_dir: NpmCacheDir, + cache_setting: CacheSetting, + fs: Arc, + http_client: Arc, + progress_bar: ProgressBar, + ) -> Self { + Self { + cache_dir, + cache_setting, + fs, + http_client, + progress_bar, + previously_reloaded_packages: Default::default(), + } + } + + pub fn cache_setting(&self) -> &CacheSetting { + &self.cache_setting + } + + pub fn root_dir_url(&self) -> &Url { + self.cache_dir.root_dir_url() + } + + /// Checks if the cache should be used for the provided name and version. + /// NOTE: Subsequent calls for the same package will always return `true` + /// to ensure a package is only downloaded once per run of the CLI. This + /// prevents downloads from re-occurring when someone has `--reload` and + /// and imports a dynamic import that imports the same package again for example. + fn should_use_global_cache_for_package(&self, package: &PackageNv) -> bool { + self.cache_setting.should_use_for_npm_package(&package.name) + || !self + .previously_reloaded_packages + .lock() + .insert(package.clone()) + } + + pub async fn ensure_package( + &self, + package: &PackageNv, + dist: &NpmPackageVersionDistInfo, + registry_url: &Url, + ) -> Result<(), AnyError> { + self + .ensure_package_inner(package, dist, registry_url) + .await + .with_context(|| format!("Failed caching npm package '{package}'.")) + } + + async fn ensure_package_inner( + &self, + package: &PackageNv, + dist: &NpmPackageVersionDistInfo, + registry_url: &Url, + ) -> Result<(), AnyError> { + let package_folder = self + .cache_dir + .package_folder_for_name_and_version(package, registry_url); + if self.should_use_global_cache_for_package(package) + && self.fs.exists_sync(&package_folder) + // if this file exists, then the package didn't successfully extract + // the first time, or another process is currently extracting the zip file + && !self.fs.exists_sync(&package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME)) + { + return Ok(()); + } else if self.cache_setting == CacheSetting::Only { + return Err(custom_error( + "NotCached", + format!( + "An npm specifier not found in cache: \"{}\", --cached-only is specified.", + &package.name + ) + ) + ); + } + + if dist.tarball.is_empty() { + bail!("Tarball URL was empty."); + } + + let guard = self.progress_bar.update(&dist.tarball); + let maybe_bytes = self + .http_client + .download_with_progress(&dist.tarball, &guard) + .await?; + match maybe_bytes { + Some(bytes) => { + verify_and_extract_tarball(package, &bytes, dist, &package_folder) + } + None => { + bail!("Could not find npm package tarball at: {}", dist.tarball); + } + } + } + + /// Ensures a copy of the package exists in the global cache. + /// + /// This assumes that the original package folder being hard linked + /// from exists before this is called. + pub fn ensure_copy_package( + &self, + folder_id: &NpmPackageCacheFolderId, + registry_url: &Url, + ) -> Result<(), AnyError> { + assert_ne!(folder_id.copy_index, 0); + let package_folder = self + .cache_dir + .package_folder_for_id(folder_id, registry_url); + + if package_folder.exists() + // if this file exists, then the package didn't successfully extract + // the first time, or another process is currently extracting the zip file + && !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists() + && self.cache_setting.should_use_for_npm_package(&folder_id.nv.name) + { + return Ok(()); + } + + let original_package_folder = self + .cache_dir + .package_folder_for_name_and_version(&folder_id.nv, registry_url); + with_folder_sync_lock(&folder_id.nv, &package_folder, || { + hard_link_dir_recursive(&original_package_folder, &package_folder) + })?; + Ok(()) + } + + pub fn package_folder_for_id( + &self, + id: &NpmPackageCacheFolderId, + registry_url: &Url, + ) -> PathBuf { + self.cache_dir.package_folder_for_id(id, registry_url) + } + + pub fn package_folder_for_name_and_version( + &self, + package: &PackageNv, + registry_url: &Url, + ) -> PathBuf { + self + .cache_dir + .package_folder_for_name_and_version(package, registry_url) + } + + pub fn package_name_folder(&self, name: &str, registry_url: &Url) -> PathBuf { + self.cache_dir.package_name_folder(name, registry_url) + } + + pub fn registry_folder(&self, registry_url: &Url) -> PathBuf { + self.cache_dir.registry_folder(registry_url) + } + + pub fn resolve_package_folder_id_from_specifier( + &self, + specifier: &ModuleSpecifier, + registry_url: &Url, + ) -> Option { + self + .cache_dir + .resolve_package_folder_id_from_specifier(specifier, registry_url) + } +} + +const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock"; + +pub fn with_folder_sync_lock( + package: &PackageNv, + output_folder: &Path, + action: impl FnOnce() -> Result<(), AnyError>, +) -> Result<(), AnyError> { + fn inner( + output_folder: &Path, + action: impl FnOnce() -> Result<(), AnyError>, + ) -> Result<(), AnyError> { + fs::create_dir_all(output_folder).with_context(|| { + format!("Error creating '{}'.", output_folder.display()) + })?; + + // This sync lock file is a way to ensure that partially created + // npm package directories aren't considered valid. This could maybe + // be a bit smarter in the future to not bother extracting here + // if another process has taken the lock in the past X seconds and + // wait for the other process to finish (it could try to create the + // file with `create_new(true)` then if it exists, check the metadata + // then wait until the other process finishes with a timeout), but + // for now this is good enough. + let sync_lock_path = output_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME); + match fs::OpenOptions::new() + .write(true) + .create(true) + .open(&sync_lock_path) + { + Ok(_) => { + action()?; + // extraction succeeded, so only now delete this file + let _ignore = std::fs::remove_file(&sync_lock_path); + Ok(()) + } + Err(err) => { + bail!( + concat!( + "Error creating package sync lock file at '{}'. ", + "Maybe try manually deleting this folder.\n\n{:#}", + ), + output_folder.display(), + err + ); + } + } + } + + match inner(output_folder, action) { + Ok(()) => Ok(()), + Err(err) => { + if let Err(remove_err) = fs::remove_dir_all(output_folder) { + if remove_err.kind() != std::io::ErrorKind::NotFound { + bail!( + concat!( + "Failed setting up package cache directory for {}, then ", + "failed cleaning it up.\n\nOriginal error:\n\n{}\n\n", + "Remove error:\n\n{}\n\nPlease manually ", + "delete this folder or you will run into issues using this ", + "package in the future:\n\n{}" + ), + package, + err, + remove_err, + output_folder.display(), + ); + } + } + Err(err) + } + } +} diff --git a/cli/npm/managed/installer.rs b/cli/npm/managed/installer.rs index 21285c3d7..8f3db0531 100644 --- a/cli/npm/managed/installer.rs +++ b/cli/npm/managed/installer.rs @@ -13,7 +13,7 @@ use deno_semver::package::PackageReq; use crate::args::PackageJsonDepsProvider; use crate::util::sync::AtomicFlag; -use super::super::CliNpmRegistryApi; +use super::CliNpmRegistryApi; use super::NpmResolution; #[derive(Debug)] diff --git a/cli/npm/managed/mod.rs b/cli/npm/managed/mod.rs index c5ba3d3af..df9ad59ac 100644 --- a/cli/npm/managed/mod.rs +++ b/cli/npm/managed/mod.rs @@ -6,6 +6,7 @@ use std::path::PathBuf; use std::sync::Arc; use deno_ast::ModuleSpecifier; +use deno_core::anyhow::Context; use deno_core::error::AnyError; use deno_core::parking_lot::Mutex; use deno_core::serde_json; @@ -14,7 +15,7 @@ use deno_graph::NpmPackageReqResolution; use deno_npm::registry::NpmRegistryApi; use deno_npm::resolution::NpmResolutionSnapshot; use deno_npm::resolution::PackageReqNotFoundError; -use deno_npm::resolution::SerializedNpmResolutionSnapshot; +use deno_npm::resolution::ValidSerializedNpmResolutionSnapshot; use deno_npm::NpmPackageId; use deno_npm::NpmResolutionPackage; use deno_npm::NpmSystemInfo; @@ -27,30 +28,213 @@ use deno_semver::npm::NpmPackageReqReference; use deno_semver::package::PackageNv; use deno_semver::package::PackageNvReference; use deno_semver::package::PackageReq; -use serde::Deserialize; -use serde::Serialize; use crate::args::Lockfile; +use crate::args::NpmProcessState; +use crate::args::PackageJsonDepsProvider; use crate::util::fs::canonicalize_path_maybe_not_exists_with_fs; +use crate::util::progress_bar::ProgressBar; + +use self::cache::NpmCache; +use self::installer::PackageJsonDepsInstaller; +use self::registry::CliNpmRegistryApi; +use self::resolution::NpmResolution; +use self::resolvers::create_npm_fs_resolver; +use self::resolvers::NpmPackageFsResolver; -use super::CliNpmRegistryApi; use super::CliNpmResolver; use super::InnerCliNpmResolverRef; +use super::NpmCacheDir; -pub use self::installer::PackageJsonDepsInstaller; -pub use self::resolution::NpmResolution; -pub use self::resolvers::create_npm_fs_resolver; -pub use self::resolvers::NpmPackageFsResolver; - +mod cache; mod installer; +mod registry; mod resolution; mod resolvers; +mod tarball; + +pub enum CliNpmResolverManagedSnapshotOption { + ResolveFromLockfile(Arc>), + Specified(Option), +} + +pub enum CliNpmResolverManagedPackageJsonInstallerOption { + ConditionalInstall(Arc), + NoInstall, +} + +pub struct CliNpmResolverManagedCreateOptions { + pub snapshot: CliNpmResolverManagedSnapshotOption, + pub maybe_lockfile: Option>>, + pub fs: Arc, + pub http_client: Arc, + pub npm_global_cache_dir: PathBuf, + pub cache_setting: crate::args::CacheSetting, + pub text_only_progress_bar: crate::util::progress_bar::ProgressBar, + pub maybe_node_modules_path: Option, + pub npm_system_info: NpmSystemInfo, + pub package_json_installer: CliNpmResolverManagedPackageJsonInstallerOption, + pub npm_registry_url: Url, +} + +pub async fn create_managed_npm_resolver_for_lsp( + options: CliNpmResolverManagedCreateOptions, +) -> Arc { + let npm_cache = create_cache(&options); + let npm_api = create_api(&options, npm_cache.clone()); + let snapshot = match resolve_snapshot(&npm_api, options.snapshot).await { + Ok(snapshot) => snapshot, + Err(err) => { + log::warn!("failed to resolve snapshot: {}", err); + None + } + }; + create_inner( + npm_cache, + npm_api, + snapshot, + options.maybe_lockfile, + options.fs, + options.text_only_progress_bar, + options.maybe_node_modules_path, + options.package_json_installer, + options.npm_registry_url, + options.npm_system_info, + ) +} -/// State provided to the process via an environment variable. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct NpmProcessState { - pub snapshot: SerializedNpmResolutionSnapshot, - pub local_node_modules_path: Option, +pub async fn create_managed_npm_resolver( + options: CliNpmResolverManagedCreateOptions, +) -> Result, AnyError> { + let npm_cache = create_cache(&options); + let npm_api = create_api(&options, npm_cache.clone()); + let snapshot = resolve_snapshot(&npm_api, options.snapshot).await?; + Ok(create_inner( + npm_cache, + npm_api, + snapshot, + options.maybe_lockfile, + options.fs, + options.text_only_progress_bar, + options.maybe_node_modules_path, + options.package_json_installer, + options.npm_registry_url, + options.npm_system_info, + )) +} + +#[allow(clippy::too_many_arguments)] +fn create_inner( + npm_cache: Arc, + npm_api: Arc, + snapshot: Option, + maybe_lockfile: Option>>, + fs: Arc, + text_only_progress_bar: crate::util::progress_bar::ProgressBar, + node_modules_dir_path: Option, + package_json_installer: CliNpmResolverManagedPackageJsonInstallerOption, + npm_registry_url: Url, + npm_system_info: NpmSystemInfo, +) -> Arc { + let resolution = Arc::new(NpmResolution::from_serialized( + npm_api.clone(), + snapshot, + maybe_lockfile.clone(), + )); + let npm_fs_resolver = create_npm_fs_resolver( + fs.clone(), + npm_cache.clone(), + &text_only_progress_bar, + npm_registry_url, + resolution.clone(), + node_modules_dir_path, + npm_system_info.clone(), + ); + let package_json_deps_installer = match package_json_installer { + CliNpmResolverManagedPackageJsonInstallerOption::ConditionalInstall( + provider, + ) => Arc::new(PackageJsonDepsInstaller::new( + provider, + npm_api.clone(), + resolution.clone(), + )), + CliNpmResolverManagedPackageJsonInstallerOption::NoInstall => { + Arc::new(PackageJsonDepsInstaller::no_op()) + } + }; + Arc::new(ManagedCliNpmResolver::new( + npm_api, + fs, + resolution, + npm_fs_resolver, + npm_cache, + maybe_lockfile, + package_json_deps_installer, + text_only_progress_bar, + npm_system_info, + )) +} + +fn create_cache(options: &CliNpmResolverManagedCreateOptions) -> Arc { + Arc::new(NpmCache::new( + NpmCacheDir::new(options.npm_global_cache_dir.clone()), + options.cache_setting.clone(), + options.fs.clone(), + options.http_client.clone(), + options.text_only_progress_bar.clone(), + )) +} + +fn create_api( + options: &CliNpmResolverManagedCreateOptions, + npm_cache: Arc, +) -> Arc { + Arc::new(CliNpmRegistryApi::new( + options.npm_registry_url.clone(), + npm_cache.clone(), + options.http_client.clone(), + options.text_only_progress_bar.clone(), + )) +} + +async fn resolve_snapshot( + api: &CliNpmRegistryApi, + snapshot: CliNpmResolverManagedSnapshotOption, +) -> Result, AnyError> { + match snapshot { + CliNpmResolverManagedSnapshotOption::ResolveFromLockfile(lockfile) => { + if !lockfile.lock().overwrite { + let snapshot = snapshot_from_lockfile(lockfile.clone(), api) + .await + .with_context(|| { + format!( + "failed reading lockfile '{}'", + lockfile.lock().filename.display() + ) + })?; + // clear the memory cache to reduce memory usage + api.clear_memory_cache(); + Ok(Some(snapshot)) + } else { + Ok(None) + } + } + CliNpmResolverManagedSnapshotOption::Specified(snapshot) => Ok(snapshot), + } +} + +async fn snapshot_from_lockfile( + lockfile: Arc>, + api: &dyn NpmRegistryApi, +) -> Result { + let incomplete_snapshot = { + let lock = lockfile.lock(); + deno_npm::resolution::incomplete_snapshot_from_lockfile(&lock)? + }; + let snapshot = + deno_npm::resolution::snapshot_from_lockfile(incomplete_snapshot, api) + .await?; + Ok(snapshot) } /// An npm resolver where the resolution is managed by Deno rather than @@ -59,40 +243,45 @@ pub struct ManagedCliNpmResolver { api: Arc, fs: Arc, fs_resolver: Arc, + global_npm_cache: Arc, resolution: Arc, maybe_lockfile: Option>>, + npm_system_info: NpmSystemInfo, + progress_bar: ProgressBar, package_json_deps_installer: Arc, } impl std::fmt::Debug for ManagedCliNpmResolver { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ManagedNpmResolver") - .field("api", &"") - .field("fs", &"") - .field("fs_resolver", &"") - .field("resolution", &"") - .field("maybe_lockfile", &"") - .field("package_json_deps_installer", &"") + .field("", &"") .finish() } } impl ManagedCliNpmResolver { + #[allow(clippy::too_many_arguments)] pub fn new( api: Arc, fs: Arc, resolution: Arc, fs_resolver: Arc, + global_npm_cache: Arc, maybe_lockfile: Option>>, package_json_deps_installer: Arc, + progress_bar: ProgressBar, + npm_system_info: NpmSystemInfo, ) -> Self { Self { api, fs, fs_resolver, + global_npm_cache, resolution, maybe_lockfile, package_json_deps_installer, + progress_bar, + npm_system_info, } } @@ -191,6 +380,15 @@ impl ManagedCliNpmResolver { self.resolution.snapshot() } + pub fn serialized_valid_snapshot_for_system( + &self, + system_info: &NpmSystemInfo, + ) -> ValidSerializedNpmResolutionSnapshot { + self + .resolution + .serialized_valid_snapshot_for_system(system_info) + } + pub fn lock(&self, lockfile: &mut Lockfile) -> Result<(), AnyError> { self.resolution.lock(lockfile) } @@ -208,8 +406,11 @@ impl ManagedCliNpmResolver { pub async fn resolve_pending(&self) -> Result<(), AnyError> { self.resolution.resolve_pending().await?; - self.fs_resolver.cache_packages().await?; - Ok(()) + self.cache_packages().await + } + + pub async fn cache_packages(&self) -> Result<(), AnyError> { + self.fs_resolver.cache_packages().await } fn resolve_pkg_id_from_pkg_req( @@ -240,6 +441,17 @@ impl ManagedCliNpmResolver { .map(|_| ()) .map_err(|err| err.into()) } + + pub fn registry_base_url(&self) -> &ModuleSpecifier { + self.api.base_url() + } + + pub fn registry_folder_in_global_cache( + &self, + registry_url: &ModuleSpecifier, + ) -> PathBuf { + self.global_npm_cache.registry_folder(registry_url) + } } impl NpmResolver for ManagedCliNpmResolver { @@ -283,6 +495,35 @@ impl CliNpmResolver for ManagedCliNpmResolver { self } + fn clone_snapshotted(&self) -> Arc { + // create a new snapshotted npm resolution and resolver + let npm_resolution = Arc::new(NpmResolution::new( + self.api.clone(), + self.resolution.snapshot(), + self.maybe_lockfile.clone(), + )); + + Arc::new(ManagedCliNpmResolver::new( + self.api.clone(), + self.fs.clone(), + npm_resolution.clone(), + create_npm_fs_resolver( + self.fs.clone(), + self.global_npm_cache.clone(), + &self.progress_bar, + self.api.base_url().clone(), + npm_resolution, + self.node_modules_path(), + self.npm_system_info.clone(), + ), + self.global_npm_cache.clone(), + self.maybe_lockfile.clone(), + self.package_json_deps_installer.clone(), + self.progress_bar.clone(), + self.npm_system_info.clone(), + )) + } + fn root_dir_url(&self) -> &Url { self.fs_resolver.root_dir_url() } diff --git a/cli/npm/managed/registry.rs b/cli/npm/managed/registry.rs new file mode 100644 index 000000000..2466f4713 --- /dev/null +++ b/cli/npm/managed/registry.rs @@ -0,0 +1,358 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashMap; +use std::collections::HashSet; +use std::fs; +use std::io::ErrorKind; +use std::path::PathBuf; +use std::sync::Arc; + +use async_trait::async_trait; +use deno_core::anyhow::anyhow; +use deno_core::anyhow::Context; +use deno_core::error::custom_error; +use deno_core::error::AnyError; +use deno_core::futures::future::BoxFuture; +use deno_core::futures::future::Shared; +use deno_core::futures::FutureExt; +use deno_core::parking_lot::Mutex; +use deno_core::serde_json; +use deno_core::url::Url; +use deno_npm::registry::NpmPackageInfo; +use deno_npm::registry::NpmRegistryApi; +use deno_npm::registry::NpmRegistryPackageInfoLoadError; + +use crate::args::CacheSetting; +use crate::cache::CACHE_PERM; +use crate::http_util::HttpClient; +use crate::util::fs::atomic_write_file; +use crate::util::progress_bar::ProgressBar; +use crate::util::sync::AtomicFlag; + +use super::cache::NpmCache; + +#[derive(Debug)] +pub struct CliNpmRegistryApi(Option>); + +impl CliNpmRegistryApi { + pub fn new( + base_url: Url, + cache: Arc, + http_client: Arc, + progress_bar: ProgressBar, + ) -> Self { + Self(Some(Arc::new(CliNpmRegistryApiInner { + base_url, + cache, + force_reload_flag: Default::default(), + mem_cache: Default::default(), + previously_reloaded_packages: Default::default(), + http_client, + progress_bar, + }))) + } + + /// Clears the internal memory cache. + pub fn clear_memory_cache(&self) { + self.inner().clear_memory_cache(); + } + + pub fn get_cached_package_info( + &self, + name: &str, + ) -> Option> { + self.inner().get_cached_package_info(name) + } + + pub fn base_url(&self) -> &Url { + &self.inner().base_url + } + + fn inner(&self) -> &Arc { + // this panicking indicates a bug in the code where this + // wasn't initialized + self.0.as_ref().unwrap() + } +} + +#[async_trait] +impl NpmRegistryApi for CliNpmRegistryApi { + async fn package_info( + &self, + name: &str, + ) -> Result, NpmRegistryPackageInfoLoadError> { + match self.inner().maybe_package_info(name).await { + Ok(Some(info)) => Ok(info), + Ok(None) => Err(NpmRegistryPackageInfoLoadError::PackageNotExists { + package_name: name.to_string(), + }), + Err(err) => { + Err(NpmRegistryPackageInfoLoadError::LoadError(Arc::new(err))) + } + } + } + + fn mark_force_reload(&self) -> bool { + // never force reload the registry information if reloading + // is disabled or if we're already reloading + if matches!( + self.inner().cache.cache_setting(), + CacheSetting::Only | CacheSetting::ReloadAll + ) { + return false; + } + if self.inner().force_reload_flag.raise() { + self.clear_memory_cache(); // clear the cache to force reloading + true + } else { + false + } + } +} + +type CacheItemPendingResult = + Result>, Arc>; + +#[derive(Debug)] +enum CacheItem { + Pending(Shared>), + Resolved(Option>), +} + +#[derive(Debug)] +struct CliNpmRegistryApiInner { + base_url: Url, + cache: Arc, + force_reload_flag: AtomicFlag, + mem_cache: Mutex>, + previously_reloaded_packages: Mutex>, + http_client: Arc, + progress_bar: ProgressBar, +} + +impl CliNpmRegistryApiInner { + pub async fn maybe_package_info( + self: &Arc, + name: &str, + ) -> Result>, AnyError> { + let (created, future) = { + let mut mem_cache = self.mem_cache.lock(); + match mem_cache.get(name) { + Some(CacheItem::Resolved(maybe_info)) => { + return Ok(maybe_info.clone()); + } + Some(CacheItem::Pending(future)) => (false, future.clone()), + None => { + if (self.cache.cache_setting().should_use_for_npm_package(name) && !self.force_reload()) + // if this has been previously reloaded, then try loading from the + // file system cache + || !self.previously_reloaded_packages.lock().insert(name.to_string()) + { + // attempt to load from the file cache + if let Some(info) = self.load_file_cached_package_info(name) { + let result = Some(Arc::new(info)); + mem_cache + .insert(name.to_string(), CacheItem::Resolved(result.clone())); + return Ok(result); + } + } + + let future = { + let api = self.clone(); + let name = name.to_string(); + async move { + api + .load_package_info_from_registry(&name) + .await + .map(|info| info.map(Arc::new)) + .map_err(Arc::new) + } + .boxed() + .shared() + }; + mem_cache + .insert(name.to_string(), CacheItem::Pending(future.clone())); + (true, future) + } + } + }; + + if created { + match future.await { + Ok(maybe_info) => { + // replace the cache item to say it's resolved now + self + .mem_cache + .lock() + .insert(name.to_string(), CacheItem::Resolved(maybe_info.clone())); + Ok(maybe_info) + } + Err(err) => { + // purge the item from the cache so it loads next time + self.mem_cache.lock().remove(name); + Err(anyhow!("{:#}", err)) + } + } + } else { + Ok(future.await.map_err(|err| anyhow!("{:#}", err))?) + } + } + + fn force_reload(&self) -> bool { + self.force_reload_flag.is_raised() + } + + fn load_file_cached_package_info( + &self, + name: &str, + ) -> Option { + match self.load_file_cached_package_info_result(name) { + Ok(value) => value, + Err(err) => { + if cfg!(debug_assertions) { + panic!("error loading cached npm package info for {name}: {err:#}"); + } else { + None + } + } + } + } + + fn load_file_cached_package_info_result( + &self, + name: &str, + ) -> Result, AnyError> { + let file_cache_path = self.get_package_file_cache_path(name); + let file_text = match fs::read_to_string(file_cache_path) { + Ok(file_text) => file_text, + Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err.into()), + }; + match serde_json::from_str(&file_text) { + Ok(package_info) => Ok(Some(package_info)), + Err(err) => { + // This scenario might mean we need to load more data from the + // npm registry than before. So, just debug log while in debug + // rather than panic. + log::debug!( + "error deserializing registry.json for '{}'. Reloading. {:?}", + name, + err + ); + Ok(None) + } + } + } + + fn save_package_info_to_file_cache( + &self, + name: &str, + package_info: &NpmPackageInfo, + ) { + if let Err(err) = + self.save_package_info_to_file_cache_result(name, package_info) + { + if cfg!(debug_assertions) { + panic!("error saving cached npm package info for {name}: {err:#}"); + } + } + } + + fn save_package_info_to_file_cache_result( + &self, + name: &str, + package_info: &NpmPackageInfo, + ) -> Result<(), AnyError> { + let file_cache_path = self.get_package_file_cache_path(name); + let file_text = serde_json::to_string(&package_info)?; + atomic_write_file(&file_cache_path, file_text, CACHE_PERM)?; + Ok(()) + } + + async fn load_package_info_from_registry( + &self, + name: &str, + ) -> Result, AnyError> { + self + .load_package_info_from_registry_inner(name) + .await + .with_context(|| { + format!( + "Error getting response at {} for package \"{}\"", + self.get_package_url(name), + name + ) + }) + } + + async fn load_package_info_from_registry_inner( + &self, + name: &str, + ) -> Result, AnyError> { + if *self.cache.cache_setting() == CacheSetting::Only { + return Err(custom_error( + "NotCached", + format!( + "An npm specifier not found in cache: \"{name}\", --cached-only is specified." + ) + )); + } + + let package_url = self.get_package_url(name); + let guard = self.progress_bar.update(package_url.as_str()); + + let maybe_bytes = self + .http_client + .download_with_progress(package_url, &guard) + .await?; + match maybe_bytes { + Some(bytes) => { + let package_info = serde_json::from_slice(&bytes)?; + self.save_package_info_to_file_cache(name, &package_info); + Ok(Some(package_info)) + } + None => Ok(None), + } + } + + fn get_package_url(&self, name: &str) -> Url { + // list of all characters used in npm packages: + // !, ', (, ), *, -, ., /, [0-9], @, [A-Za-z], _, ~ + const ASCII_SET: percent_encoding::AsciiSet = + percent_encoding::NON_ALPHANUMERIC + .remove(b'!') + .remove(b'\'') + .remove(b'(') + .remove(b')') + .remove(b'*') + .remove(b'-') + .remove(b'.') + .remove(b'/') + .remove(b'@') + .remove(b'_') + .remove(b'~'); + let name = percent_encoding::utf8_percent_encode(name, &ASCII_SET); + self.base_url.join(&name.to_string()).unwrap() + } + + fn get_package_file_cache_path(&self, name: &str) -> PathBuf { + let name_folder_path = self.cache.package_name_folder(name, &self.base_url); + name_folder_path.join("registry.json") + } + + fn clear_memory_cache(&self) { + self.mem_cache.lock().clear(); + } + + pub fn get_cached_package_info( + &self, + name: &str, + ) -> Option> { + let mem_cache = self.mem_cache.lock(); + if let Some(CacheItem::Resolved(maybe_info)) = mem_cache.get(name) { + maybe_info.clone() + } else { + None + } + } +} diff --git a/cli/npm/managed/resolution.rs b/cli/npm/managed/resolution.rs index 05c1227a7..f05275f3c 100644 --- a/cli/npm/managed/resolution.rs +++ b/cli/npm/managed/resolution.rs @@ -34,7 +34,7 @@ use deno_semver::VersionReq; use crate::args::Lockfile; use crate::util::sync::TaskQueue; -use super::super::registry::CliNpmRegistryApi; +use super::CliNpmRegistryApi; /// Handles updating and storing npm resolution in memory where the underlying /// snapshot can be updated concurrently. Additionally handles updating the lockfile @@ -221,8 +221,6 @@ impl NpmResolution { .map(|pkg| pkg.id.clone()) } - // todo: NEXT - /// Resolves a package requirement for deno graph. This should only be /// called by deno_graph's NpmResolver or for resolving packages in /// a package.json @@ -275,14 +273,6 @@ impl NpmResolution { .all_system_packages_partitioned(system_info) } - // todo: NEXT - - pub fn has_packages(&self) -> bool { - !self.snapshot.read().is_empty() - } - - // todo: NEXT - pub fn snapshot(&self) -> NpmResolutionSnapshot { self.snapshot.read().clone() } @@ -293,8 +283,6 @@ impl NpmResolution { self.snapshot.read().as_valid_serialized() } - // todo: NEXT - pub fn serialized_valid_snapshot_for_system( &self, system_info: &NpmSystemInfo, diff --git a/cli/npm/managed/resolvers/common.rs b/cli/npm/managed/resolvers/common.rs index 4076579bf..b0f375779 100644 --- a/cli/npm/managed/resolvers/common.rs +++ b/cli/npm/managed/resolvers/common.rs @@ -20,7 +20,7 @@ use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; -use crate::npm::NpmCache; +use super::super::cache::NpmCache; /// Part of the resolution that interacts with the file system. #[async_trait] diff --git a/cli/npm/managed/resolvers/global.rs b/cli/npm/managed/resolvers/global.rs index 25db62f73..3f042a38b 100644 --- a/cli/npm/managed/resolvers/global.rs +++ b/cli/npm/managed/resolvers/global.rs @@ -20,8 +20,7 @@ use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; -use crate::npm::NpmCache; - +use super::super::cache::NpmCache; use super::super::resolution::NpmResolution; use super::common::cache_packages; use super::common::types_package_name; diff --git a/cli/npm/managed/resolvers/local.rs b/cli/npm/managed/resolvers/local.rs index 57170eccd..8e4d72f26 100644 --- a/cli/npm/managed/resolvers/local.rs +++ b/cli/npm/managed/resolvers/local.rs @@ -12,7 +12,7 @@ use std::path::PathBuf; use std::sync::Arc; use crate::cache::CACHE_PERM; -use crate::npm::cache::mixed_case_package_name_decode; +use crate::npm::cache_dir::mixed_case_package_name_decode; use crate::util::fs::atomic_write_file; use crate::util::fs::canonicalize_path_maybe_not_exists_with_fs; use crate::util::fs::symlink_dir; @@ -41,11 +41,11 @@ use deno_semver::package::PackageNv; use serde::Deserialize; use serde::Serialize; -use crate::npm::cache::mixed_case_package_name_encode; -use crate::npm::NpmCache; +use crate::npm::cache_dir::mixed_case_package_name_encode; use crate::util::fs::copy_dir_recursive; use crate::util::fs::hard_link_dir_recursive; +use super::super::cache::NpmCache; use super::super::resolution::NpmResolution; use super::common::types_package_name; use super::common::NpmPackageFsResolver; diff --git a/cli/npm/managed/resolvers/mod.rs b/cli/npm/managed/resolvers/mod.rs index b6d96c4af..5fc140f26 100644 --- a/cli/npm/managed/resolvers/mod.rs +++ b/cli/npm/managed/resolvers/mod.rs @@ -11,14 +11,15 @@ use deno_core::url::Url; use deno_npm::NpmSystemInfo; use deno_runtime::deno_fs::FileSystem; -use crate::npm::NpmCache; use crate::util::progress_bar::ProgressBar; pub use self::common::NpmPackageFsResolver; + use self::global::GlobalNpmPackageResolver; use self::local::LocalNpmPackageResolver; -use super::NpmResolution; +use super::cache::NpmCache; +use super::resolution::NpmResolution; pub fn create_npm_fs_resolver( fs: Arc, diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs new file mode 100644 index 000000000..e72b1afc8 --- /dev/null +++ b/cli/npm/managed/tarball.rs @@ -0,0 +1,241 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashSet; +use std::fs; +use std::path::Path; +use std::path::PathBuf; + +use deno_core::anyhow::bail; +use deno_core::error::AnyError; +use deno_npm::registry::NpmPackageVersionDistInfo; +use deno_npm::registry::NpmPackageVersionDistInfoIntegrity; +use deno_semver::package::PackageNv; +use flate2::read::GzDecoder; +use tar::Archive; +use tar::EntryType; + +use super::cache::with_folder_sync_lock; + +pub fn verify_and_extract_tarball( + package: &PackageNv, + data: &[u8], + dist_info: &NpmPackageVersionDistInfo, + output_folder: &Path, +) -> Result<(), AnyError> { + verify_tarball_integrity(package, data, &dist_info.integrity())?; + + with_folder_sync_lock(package, output_folder, || { + extract_tarball(data, output_folder) + }) +} + +fn verify_tarball_integrity( + package: &PackageNv, + data: &[u8], + npm_integrity: &NpmPackageVersionDistInfoIntegrity, +) -> Result<(), AnyError> { + use ring::digest::Context; + let (tarball_checksum, expected_checksum) = match npm_integrity { + NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm, + base64_hash, + } => { + let algo = match *algorithm { + "sha512" => &ring::digest::SHA512, + "sha1" => &ring::digest::SHA1_FOR_LEGACY_USE_ONLY, + hash_kind => bail!( + "Not implemented hash function for {}: {}", + package, + hash_kind + ), + }; + let mut hash_ctx = Context::new(algo); + hash_ctx.update(data); + let digest = hash_ctx.finish(); + let tarball_checksum = base64::encode(digest.as_ref()).to_lowercase(); + (tarball_checksum, base64_hash.to_lowercase()) + } + NpmPackageVersionDistInfoIntegrity::LegacySha1Hex(hex) => { + let mut hash_ctx = Context::new(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY); + hash_ctx.update(data); + let digest = hash_ctx.finish(); + let tarball_checksum = hex::encode(digest.as_ref()).to_lowercase(); + (tarball_checksum, hex.to_lowercase()) + } + NpmPackageVersionDistInfoIntegrity::UnknownIntegrity(integrity) => { + bail!( + "Not implemented integrity kind for {}: {}", + package, + integrity + ) + } + }; + + if tarball_checksum != expected_checksum { + bail!( + "Tarball checksum did not match what was provided by npm registry for {}.\n\nExpected: {}\nActual: {}", + package, + expected_checksum, + tarball_checksum, + ) + } + Ok(()) +} + +fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> { + fs::create_dir_all(output_folder)?; + let output_folder = fs::canonicalize(output_folder)?; + let tar = GzDecoder::new(data); + let mut archive = Archive::new(tar); + archive.set_overwrite(true); + archive.set_preserve_permissions(true); + let mut created_dirs = HashSet::new(); + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + let entry_type = entry.header().entry_type(); + + // Some package tarballs contain "pax_global_header", these entries + // should be skipped. + if entry_type == EntryType::XGlobalHeader { + continue; + } + + // skip the first component which will be either "package" or the name of the package + let relative_path = path.components().skip(1).collect::(); + let absolute_path = output_folder.join(relative_path); + let dir_path = if entry_type == EntryType::Directory { + absolute_path.as_path() + } else { + absolute_path.parent().unwrap() + }; + if created_dirs.insert(dir_path.to_path_buf()) { + fs::create_dir_all(dir_path)?; + let canonicalized_dir = fs::canonicalize(dir_path)?; + if !canonicalized_dir.starts_with(&output_folder) { + bail!( + "Extracted directory '{}' of npm tarball was not in output directory.", + canonicalized_dir.display() + ) + } + } + + let entry_type = entry.header().entry_type(); + match entry_type { + EntryType::Regular => { + entry.unpack(&absolute_path)?; + } + EntryType::Symlink | EntryType::Link => { + // At the moment, npm doesn't seem to support uploading hardlinks or + // symlinks to the npm registry. If ever adding symlink or hardlink + // support, we will need to validate that the hardlink and symlink + // target are within the package directory. + log::warn!( + "Ignoring npm tarball entry type {:?} for '{}'", + entry_type, + absolute_path.display() + ) + } + _ => { + // ignore + } + } + } + Ok(()) +} + +#[cfg(test)] +mod test { + use deno_semver::Version; + + use super::*; + + #[test] + pub fn test_verify_tarball() { + let package = PackageNv { + name: "package".to_string(), + version: Version::parse_from_npm("1.0.0").unwrap(), + }; + let actual_checksum = + "z4phnx7vul3xvchq1m2ab9yg5aulvxxcg/spidns6c5h0ne8xyxysp+dgnkhfuwvy7kxvudbeoglodj6+sfapg=="; + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::UnknownIntegrity("test") + ) + .unwrap_err() + .to_string(), + "Not implemented integrity kind for package@1.0.0: test", + ); + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "notimplemented", + base64_hash: "test" + } + ) + .unwrap_err() + .to_string(), + "Not implemented hash function for package@1.0.0: notimplemented", + ); + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "sha1", + base64_hash: "test" + } + ) + .unwrap_err() + .to_string(), + concat!( + "Tarball checksum did not match what was provided by npm ", + "registry for package@1.0.0.\n\nExpected: test\nActual: 2jmj7l5rsw0yvb/vlwaykk/ybwk=", + ), + ); + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "sha512", + base64_hash: "test" + } + ) + .unwrap_err() + .to_string(), + format!("Tarball checksum did not match what was provided by npm registry for package@1.0.0.\n\nExpected: test\nActual: {actual_checksum}"), + ); + assert!(verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "sha512", + base64_hash: actual_checksum, + }, + ) + .is_ok()); + let actual_hex = "da39a3ee5e6b4b0d3255bfef95601890afd80709"; + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::LegacySha1Hex("test"), + ) + .unwrap_err() + .to_string(), + format!("Tarball checksum did not match what was provided by npm registry for package@1.0.0.\n\nExpected: test\nActual: {actual_hex}"), + ); + assert!(verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::LegacySha1Hex(actual_hex), + ) + .is_ok()); + } +} diff --git a/cli/npm/mod.rs b/cli/npm/mod.rs index 114bf15f2..22997a8b2 100644 --- a/cli/npm/mod.rs +++ b/cli/npm/mod.rs @@ -1,12 +1,8 @@ // Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. +mod cache_dir; mod managed; -// todo(#18967): move the cache, registry, and tarball into the managed folder -mod cache; -mod registry; -mod tarball; - use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; @@ -17,20 +13,45 @@ use deno_core::url::Url; use deno_graph::NpmPackageReqResolution; use deno_npm::resolution::PackageReqNotFoundError; use deno_runtime::deno_node::NpmResolver; - -pub use cache::NpmCache; -pub use cache::NpmCacheDir; use deno_semver::npm::NpmPackageNvReference; use deno_semver::npm::NpmPackageReqReference; use deno_semver::package::PackageNv; use deno_semver::package::PackageReq; -pub use managed::create_npm_fs_resolver; -pub use managed::ManagedCliNpmResolver; -pub use managed::NpmPackageFsResolver; -pub use managed::NpmProcessState; -pub use managed::NpmResolution; -pub use managed::PackageJsonDepsInstaller; -pub use registry::CliNpmRegistryApi; + +pub use self::cache_dir::NpmCacheDir; +pub use self::managed::CliNpmResolverManagedCreateOptions; +pub use self::managed::CliNpmResolverManagedPackageJsonInstallerOption; +pub use self::managed::CliNpmResolverManagedSnapshotOption; +pub use self::managed::ManagedCliNpmResolver; + +pub enum CliNpmResolverCreateOptions { + Managed(CliNpmResolverManagedCreateOptions), + // todo(dsherret): implement this + #[allow(dead_code)] + Byonm, +} + +pub async fn create_cli_npm_resolver_for_lsp( + options: CliNpmResolverCreateOptions, +) -> Arc { + use CliNpmResolverCreateOptions::*; + match options { + Managed(options) => { + managed::create_managed_npm_resolver_for_lsp(options).await + } + Byonm => todo!(), + } +} + +pub async fn create_cli_npm_resolver( + options: CliNpmResolverCreateOptions, +) -> Result, AnyError> { + use CliNpmResolverCreateOptions::*; + match options { + Managed(options) => managed::create_managed_npm_resolver(options).await, + Byonm => todo!(), + } +} pub enum InnerCliNpmResolverRef<'a> { Managed(&'a ManagedCliNpmResolver), @@ -41,6 +62,8 @@ pub enum InnerCliNpmResolverRef<'a> { pub trait CliNpmResolver: NpmResolver { fn into_npm_resolver(self: Arc) -> Arc; + fn clone_snapshotted(&self) -> Arc; + fn root_dir_url(&self) -> &Url; fn as_inner(&self) -> InnerCliNpmResolverRef; diff --git a/cli/npm/registry.rs b/cli/npm/registry.rs deleted file mode 100644 index 61eb4123d..000000000 --- a/cli/npm/registry.rs +++ /dev/null @@ -1,381 +0,0 @@ -// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. - -use std::collections::HashMap; -use std::collections::HashSet; -use std::fs; -use std::io::ErrorKind; -use std::path::PathBuf; -use std::sync::Arc; - -use async_trait::async_trait; -use deno_core::anyhow::anyhow; -use deno_core::anyhow::Context; -use deno_core::error::custom_error; -use deno_core::error::AnyError; -use deno_core::futures::future::BoxFuture; -use deno_core::futures::future::Shared; -use deno_core::futures::FutureExt; -use deno_core::parking_lot::Mutex; -use deno_core::serde_json; -use deno_core::url::Url; -use deno_npm::registry::NpmPackageInfo; -use deno_npm::registry::NpmRegistryApi; -use deno_npm::registry::NpmRegistryPackageInfoLoadError; -use once_cell::sync::Lazy; - -use crate::args::CacheSetting; -use crate::cache::CACHE_PERM; -use crate::http_util::HttpClient; -use crate::util::fs::atomic_write_file; -use crate::util::progress_bar::ProgressBar; -use crate::util::sync::AtomicFlag; - -use super::cache::NpmCache; - -static NPM_REGISTRY_DEFAULT_URL: Lazy = Lazy::new(|| { - let env_var_name = "NPM_CONFIG_REGISTRY"; - if let Ok(registry_url) = std::env::var(env_var_name) { - // ensure there is a trailing slash for the directory - let registry_url = format!("{}/", registry_url.trim_end_matches('/')); - match Url::parse(®istry_url) { - Ok(url) => { - return url; - } - Err(err) => { - log::debug!("Invalid {} environment variable: {:#}", env_var_name, err,); - } - } - } - - Url::parse("https://registry.npmjs.org").unwrap() -}); - -#[derive(Debug)] -pub struct CliNpmRegistryApi(Option>); - -impl CliNpmRegistryApi { - pub fn default_url() -> &'static Url { - &NPM_REGISTRY_DEFAULT_URL - } - - pub fn new( - base_url: Url, - cache: Arc, - http_client: Arc, - progress_bar: ProgressBar, - ) -> Self { - Self(Some(Arc::new(CliNpmRegistryApiInner { - base_url, - cache, - force_reload_flag: Default::default(), - mem_cache: Default::default(), - previously_reloaded_packages: Default::default(), - http_client, - progress_bar, - }))) - } - - /// Clears the internal memory cache. - pub fn clear_memory_cache(&self) { - self.inner().clear_memory_cache(); - } - - pub fn get_cached_package_info( - &self, - name: &str, - ) -> Option> { - self.inner().get_cached_package_info(name) - } - - pub fn base_url(&self) -> &Url { - &self.inner().base_url - } - - fn inner(&self) -> &Arc { - // this panicking indicates a bug in the code where this - // wasn't initialized - self.0.as_ref().unwrap() - } -} - -#[async_trait] -impl NpmRegistryApi for CliNpmRegistryApi { - async fn package_info( - &self, - name: &str, - ) -> Result, NpmRegistryPackageInfoLoadError> { - match self.inner().maybe_package_info(name).await { - Ok(Some(info)) => Ok(info), - Ok(None) => Err(NpmRegistryPackageInfoLoadError::PackageNotExists { - package_name: name.to_string(), - }), - Err(err) => { - Err(NpmRegistryPackageInfoLoadError::LoadError(Arc::new(err))) - } - } - } - - fn mark_force_reload(&self) -> bool { - // never force reload the registry information if reloading - // is disabled or if we're already reloading - if matches!( - self.inner().cache.cache_setting(), - CacheSetting::Only | CacheSetting::ReloadAll - ) { - return false; - } - if self.inner().force_reload_flag.raise() { - self.clear_memory_cache(); // clear the cache to force reloading - true - } else { - false - } - } -} - -type CacheItemPendingResult = - Result>, Arc>; - -#[derive(Debug)] -enum CacheItem { - Pending(Shared>), - Resolved(Option>), -} - -#[derive(Debug)] -struct CliNpmRegistryApiInner { - base_url: Url, - cache: Arc, - force_reload_flag: AtomicFlag, - mem_cache: Mutex>, - previously_reloaded_packages: Mutex>, - http_client: Arc, - progress_bar: ProgressBar, -} - -impl CliNpmRegistryApiInner { - pub async fn maybe_package_info( - self: &Arc, - name: &str, - ) -> Result>, AnyError> { - let (created, future) = { - let mut mem_cache = self.mem_cache.lock(); - match mem_cache.get(name) { - Some(CacheItem::Resolved(maybe_info)) => { - return Ok(maybe_info.clone()); - } - Some(CacheItem::Pending(future)) => (false, future.clone()), - None => { - if (self.cache.cache_setting().should_use_for_npm_package(name) && !self.force_reload()) - // if this has been previously reloaded, then try loading from the - // file system cache - || !self.previously_reloaded_packages.lock().insert(name.to_string()) - { - // attempt to load from the file cache - if let Some(info) = self.load_file_cached_package_info(name) { - let result = Some(Arc::new(info)); - mem_cache - .insert(name.to_string(), CacheItem::Resolved(result.clone())); - return Ok(result); - } - } - - let future = { - let api = self.clone(); - let name = name.to_string(); - async move { - api - .load_package_info_from_registry(&name) - .await - .map(|info| info.map(Arc::new)) - .map_err(Arc::new) - } - .boxed() - .shared() - }; - mem_cache - .insert(name.to_string(), CacheItem::Pending(future.clone())); - (true, future) - } - } - }; - - if created { - match future.await { - Ok(maybe_info) => { - // replace the cache item to say it's resolved now - self - .mem_cache - .lock() - .insert(name.to_string(), CacheItem::Resolved(maybe_info.clone())); - Ok(maybe_info) - } - Err(err) => { - // purge the item from the cache so it loads next time - self.mem_cache.lock().remove(name); - Err(anyhow!("{:#}", err)) - } - } - } else { - Ok(future.await.map_err(|err| anyhow!("{:#}", err))?) - } - } - - fn force_reload(&self) -> bool { - self.force_reload_flag.is_raised() - } - - fn load_file_cached_package_info( - &self, - name: &str, - ) -> Option { - match self.load_file_cached_package_info_result(name) { - Ok(value) => value, - Err(err) => { - if cfg!(debug_assertions) { - panic!("error loading cached npm package info for {name}: {err:#}"); - } else { - None - } - } - } - } - - fn load_file_cached_package_info_result( - &self, - name: &str, - ) -> Result, AnyError> { - let file_cache_path = self.get_package_file_cache_path(name); - let file_text = match fs::read_to_string(file_cache_path) { - Ok(file_text) => file_text, - Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), - Err(err) => return Err(err.into()), - }; - match serde_json::from_str(&file_text) { - Ok(package_info) => Ok(Some(package_info)), - Err(err) => { - // This scenario might mean we need to load more data from the - // npm registry than before. So, just debug log while in debug - // rather than panic. - log::debug!( - "error deserializing registry.json for '{}'. Reloading. {:?}", - name, - err - ); - Ok(None) - } - } - } - - fn save_package_info_to_file_cache( - &self, - name: &str, - package_info: &NpmPackageInfo, - ) { - if let Err(err) = - self.save_package_info_to_file_cache_result(name, package_info) - { - if cfg!(debug_assertions) { - panic!("error saving cached npm package info for {name}: {err:#}"); - } - } - } - - fn save_package_info_to_file_cache_result( - &self, - name: &str, - package_info: &NpmPackageInfo, - ) -> Result<(), AnyError> { - let file_cache_path = self.get_package_file_cache_path(name); - let file_text = serde_json::to_string(&package_info)?; - atomic_write_file(&file_cache_path, file_text, CACHE_PERM)?; - Ok(()) - } - - async fn load_package_info_from_registry( - &self, - name: &str, - ) -> Result, AnyError> { - self - .load_package_info_from_registry_inner(name) - .await - .with_context(|| { - format!( - "Error getting response at {} for package \"{}\"", - self.get_package_url(name), - name - ) - }) - } - - async fn load_package_info_from_registry_inner( - &self, - name: &str, - ) -> Result, AnyError> { - if *self.cache.cache_setting() == CacheSetting::Only { - return Err(custom_error( - "NotCached", - format!( - "An npm specifier not found in cache: \"{name}\", --cached-only is specified." - ) - )); - } - - let package_url = self.get_package_url(name); - let guard = self.progress_bar.update(package_url.as_str()); - - let maybe_bytes = self - .http_client - .download_with_progress(package_url, &guard) - .await?; - match maybe_bytes { - Some(bytes) => { - let package_info = serde_json::from_slice(&bytes)?; - self.save_package_info_to_file_cache(name, &package_info); - Ok(Some(package_info)) - } - None => Ok(None), - } - } - - fn get_package_url(&self, name: &str) -> Url { - // list of all characters used in npm packages: - // !, ', (, ), *, -, ., /, [0-9], @, [A-Za-z], _, ~ - const ASCII_SET: percent_encoding::AsciiSet = - percent_encoding::NON_ALPHANUMERIC - .remove(b'!') - .remove(b'\'') - .remove(b'(') - .remove(b')') - .remove(b'*') - .remove(b'-') - .remove(b'.') - .remove(b'/') - .remove(b'@') - .remove(b'_') - .remove(b'~'); - let name = percent_encoding::utf8_percent_encode(name, &ASCII_SET); - self.base_url.join(&name.to_string()).unwrap() - } - - fn get_package_file_cache_path(&self, name: &str) -> PathBuf { - let name_folder_path = self.cache.package_name_folder(name, &self.base_url); - name_folder_path.join("registry.json") - } - - fn clear_memory_cache(&self) { - self.mem_cache.lock().clear(); - } - - pub fn get_cached_package_info( - &self, - name: &str, - ) -> Option> { - let mem_cache = self.mem_cache.lock(); - if let Some(CacheItem::Resolved(maybe_info)) = mem_cache.get(name) { - maybe_info.clone() - } else { - None - } - } -} diff --git a/cli/npm/tarball.rs b/cli/npm/tarball.rs deleted file mode 100644 index e72b1afc8..000000000 --- a/cli/npm/tarball.rs +++ /dev/null @@ -1,241 +0,0 @@ -// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. - -use std::collections::HashSet; -use std::fs; -use std::path::Path; -use std::path::PathBuf; - -use deno_core::anyhow::bail; -use deno_core::error::AnyError; -use deno_npm::registry::NpmPackageVersionDistInfo; -use deno_npm::registry::NpmPackageVersionDistInfoIntegrity; -use deno_semver::package::PackageNv; -use flate2::read::GzDecoder; -use tar::Archive; -use tar::EntryType; - -use super::cache::with_folder_sync_lock; - -pub fn verify_and_extract_tarball( - package: &PackageNv, - data: &[u8], - dist_info: &NpmPackageVersionDistInfo, - output_folder: &Path, -) -> Result<(), AnyError> { - verify_tarball_integrity(package, data, &dist_info.integrity())?; - - with_folder_sync_lock(package, output_folder, || { - extract_tarball(data, output_folder) - }) -} - -fn verify_tarball_integrity( - package: &PackageNv, - data: &[u8], - npm_integrity: &NpmPackageVersionDistInfoIntegrity, -) -> Result<(), AnyError> { - use ring::digest::Context; - let (tarball_checksum, expected_checksum) = match npm_integrity { - NpmPackageVersionDistInfoIntegrity::Integrity { - algorithm, - base64_hash, - } => { - let algo = match *algorithm { - "sha512" => &ring::digest::SHA512, - "sha1" => &ring::digest::SHA1_FOR_LEGACY_USE_ONLY, - hash_kind => bail!( - "Not implemented hash function for {}: {}", - package, - hash_kind - ), - }; - let mut hash_ctx = Context::new(algo); - hash_ctx.update(data); - let digest = hash_ctx.finish(); - let tarball_checksum = base64::encode(digest.as_ref()).to_lowercase(); - (tarball_checksum, base64_hash.to_lowercase()) - } - NpmPackageVersionDistInfoIntegrity::LegacySha1Hex(hex) => { - let mut hash_ctx = Context::new(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY); - hash_ctx.update(data); - let digest = hash_ctx.finish(); - let tarball_checksum = hex::encode(digest.as_ref()).to_lowercase(); - (tarball_checksum, hex.to_lowercase()) - } - NpmPackageVersionDistInfoIntegrity::UnknownIntegrity(integrity) => { - bail!( - "Not implemented integrity kind for {}: {}", - package, - integrity - ) - } - }; - - if tarball_checksum != expected_checksum { - bail!( - "Tarball checksum did not match what was provided by npm registry for {}.\n\nExpected: {}\nActual: {}", - package, - expected_checksum, - tarball_checksum, - ) - } - Ok(()) -} - -fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> { - fs::create_dir_all(output_folder)?; - let output_folder = fs::canonicalize(output_folder)?; - let tar = GzDecoder::new(data); - let mut archive = Archive::new(tar); - archive.set_overwrite(true); - archive.set_preserve_permissions(true); - let mut created_dirs = HashSet::new(); - - for entry in archive.entries()? { - let mut entry = entry?; - let path = entry.path()?; - let entry_type = entry.header().entry_type(); - - // Some package tarballs contain "pax_global_header", these entries - // should be skipped. - if entry_type == EntryType::XGlobalHeader { - continue; - } - - // skip the first component which will be either "package" or the name of the package - let relative_path = path.components().skip(1).collect::(); - let absolute_path = output_folder.join(relative_path); - let dir_path = if entry_type == EntryType::Directory { - absolute_path.as_path() - } else { - absolute_path.parent().unwrap() - }; - if created_dirs.insert(dir_path.to_path_buf()) { - fs::create_dir_all(dir_path)?; - let canonicalized_dir = fs::canonicalize(dir_path)?; - if !canonicalized_dir.starts_with(&output_folder) { - bail!( - "Extracted directory '{}' of npm tarball was not in output directory.", - canonicalized_dir.display() - ) - } - } - - let entry_type = entry.header().entry_type(); - match entry_type { - EntryType::Regular => { - entry.unpack(&absolute_path)?; - } - EntryType::Symlink | EntryType::Link => { - // At the moment, npm doesn't seem to support uploading hardlinks or - // symlinks to the npm registry. If ever adding symlink or hardlink - // support, we will need to validate that the hardlink and symlink - // target are within the package directory. - log::warn!( - "Ignoring npm tarball entry type {:?} for '{}'", - entry_type, - absolute_path.display() - ) - } - _ => { - // ignore - } - } - } - Ok(()) -} - -#[cfg(test)] -mod test { - use deno_semver::Version; - - use super::*; - - #[test] - pub fn test_verify_tarball() { - let package = PackageNv { - name: "package".to_string(), - version: Version::parse_from_npm("1.0.0").unwrap(), - }; - let actual_checksum = - "z4phnx7vul3xvchq1m2ab9yg5aulvxxcg/spidns6c5h0ne8xyxysp+dgnkhfuwvy7kxvudbeoglodj6+sfapg=="; - assert_eq!( - verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::UnknownIntegrity("test") - ) - .unwrap_err() - .to_string(), - "Not implemented integrity kind for package@1.0.0: test", - ); - assert_eq!( - verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::Integrity { - algorithm: "notimplemented", - base64_hash: "test" - } - ) - .unwrap_err() - .to_string(), - "Not implemented hash function for package@1.0.0: notimplemented", - ); - assert_eq!( - verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::Integrity { - algorithm: "sha1", - base64_hash: "test" - } - ) - .unwrap_err() - .to_string(), - concat!( - "Tarball checksum did not match what was provided by npm ", - "registry for package@1.0.0.\n\nExpected: test\nActual: 2jmj7l5rsw0yvb/vlwaykk/ybwk=", - ), - ); - assert_eq!( - verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::Integrity { - algorithm: "sha512", - base64_hash: "test" - } - ) - .unwrap_err() - .to_string(), - format!("Tarball checksum did not match what was provided by npm registry for package@1.0.0.\n\nExpected: test\nActual: {actual_checksum}"), - ); - assert!(verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::Integrity { - algorithm: "sha512", - base64_hash: actual_checksum, - }, - ) - .is_ok()); - let actual_hex = "da39a3ee5e6b4b0d3255bfef95601890afd80709"; - assert_eq!( - verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::LegacySha1Hex("test"), - ) - .unwrap_err() - .to_string(), - format!("Tarball checksum did not match what was provided by npm registry for package@1.0.0.\n\nExpected: test\nActual: {actual_hex}"), - ); - assert!(verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::LegacySha1Hex(actual_hex), - ) - .is_ok()); - } -} -- cgit v1.2.3