From b1f776adef6f0d0caa0b2badf9fb707cf5efa6e7 Mon Sep 17 00:00:00 2001 From: David Sherret Date: Sun, 2 Jun 2024 21:39:13 -0400 Subject: refactor: extract structs for downloading tarballs and npm registry packuments (#24067) --- cli/npm/cache_dir.rs | 4 +- cli/npm/managed/cache.rs | 309 ------------------------- cli/npm/managed/cache/mod.rs | 254 +++++++++++++++++++++ cli/npm/managed/cache/registry_info.rs | 284 +++++++++++++++++++++++ cli/npm/managed/cache/tarball.rs | 210 +++++++++++++++++ cli/npm/managed/cache/tarball_extract.rs | 324 +++++++++++++++++++++++++++ cli/npm/managed/mod.rs | 127 ++++++----- cli/npm/managed/registry.rs | 181 ++------------- cli/npm/managed/resolvers/common.rs | 28 ++- cli/npm/managed/resolvers/global.rs | 26 ++- cli/npm/managed/resolvers/local.rs | 62 ++--- cli/npm/managed/resolvers/mod.rs | 19 +- cli/npm/managed/tarball.rs | 324 --------------------------- tests/integration/npm_tests.rs | 6 - tests/testdata/npm/deno_run_non_existent.out | 1 - 15 files changed, 1246 insertions(+), 913 deletions(-) delete mode 100644 cli/npm/managed/cache.rs create mode 100644 cli/npm/managed/cache/mod.rs create mode 100644 cli/npm/managed/cache/registry_info.rs create mode 100644 cli/npm/managed/cache/tarball.rs create mode 100644 cli/npm/managed/cache/tarball_extract.rs delete mode 100644 cli/npm/managed/tarball.rs diff --git a/cli/npm/cache_dir.rs b/cli/npm/cache_dir.rs index d51913775..1f1f5e956 100644 --- a/cli/npm/cache_dir.rs +++ b/cli/npm/cache_dir.rs @@ -72,7 +72,7 @@ impl NpmCacheDir { registry_url: &Url, ) -> PathBuf { if folder_id.copy_index == 0 { - self.package_folder_for_name_and_version(&folder_id.nv, registry_url) + self.package_folder_for_nv(&folder_id.nv, registry_url) } else { self .package_name_folder(&folder_id.nv.name, registry_url) @@ -80,7 +80,7 @@ impl NpmCacheDir { } } - pub fn package_folder_for_name_and_version( + pub fn package_folder_for_nv( &self, package: &PackageNv, registry_url: &Url, diff --git a/cli/npm/managed/cache.rs b/cli/npm/managed/cache.rs deleted file mode 100644 index 4056c97ad..000000000 --- a/cli/npm/managed/cache.rs +++ /dev/null @@ -1,309 +0,0 @@ -// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. - -use std::collections::HashSet; -use std::fs; -use std::path::Path; -use std::path::PathBuf; -use std::sync::Arc; - -use deno_ast::ModuleSpecifier; -use deno_core::anyhow::bail; -use deno_core::anyhow::Context; -use deno_core::error::custom_error; -use deno_core::error::AnyError; -use deno_core::parking_lot::Mutex; -use deno_core::url::Url; -use deno_npm::npm_rc::ResolvedNpmRc; -use deno_npm::registry::NpmPackageVersionDistInfo; -use deno_npm::NpmPackageCacheFolderId; -use deno_runtime::deno_fs; -use deno_semver::package::PackageNv; - -use crate::args::CacheSetting; -use crate::http_util::HttpClient; -use crate::npm::common::maybe_auth_header_for_npm_registry; -use crate::npm::NpmCacheDir; -use crate::util::fs::hard_link_dir_recursive; -use crate::util::progress_bar::ProgressBar; - -use super::tarball::verify_and_extract_tarball; -use super::tarball::TarballExtractionMode; - -/// Stores a single copy of npm packages in a cache. -#[derive(Debug)] -pub struct NpmCache { - cache_dir: NpmCacheDir, - cache_setting: CacheSetting, - fs: Arc, - http_client: Arc, - progress_bar: ProgressBar, - pub(crate) npmrc: Arc, - /// ensures a package is only downloaded once per run - previously_reloaded_packages: Mutex>, -} - -impl NpmCache { - pub fn new( - cache_dir: NpmCacheDir, - cache_setting: CacheSetting, - fs: Arc, - http_client: Arc, - progress_bar: ProgressBar, - npmrc: Arc, - ) -> Self { - Self { - cache_dir, - cache_setting, - fs, - http_client, - progress_bar, - previously_reloaded_packages: Default::default(), - npmrc, - } - } - - pub fn cache_setting(&self) -> &CacheSetting { - &self.cache_setting - } - - pub fn root_dir_url(&self) -> &Url { - self.cache_dir.root_dir_url() - } - - /// Checks if the cache should be used for the provided name and version. - /// NOTE: Subsequent calls for the same package will always return `true` - /// to ensure a package is only downloaded once per run of the CLI. This - /// prevents downloads from re-occurring when someone has `--reload` and - /// and imports a dynamic import that imports the same package again for example. - fn should_use_cache_for_package(&self, package: &PackageNv) -> bool { - self.cache_setting.should_use_for_npm_package(&package.name) - || !self - .previously_reloaded_packages - .lock() - .insert(package.clone()) - } - - pub async fn ensure_package( - &self, - package: &PackageNv, - dist: &NpmPackageVersionDistInfo, - ) -> Result<(), AnyError> { - self - .ensure_package_inner(package, dist) - .await - .with_context(|| format!("Failed caching npm package '{package}'.")) - } - - async fn ensure_package_inner( - &self, - package_nv: &PackageNv, - dist: &NpmPackageVersionDistInfo, - ) -> Result<(), AnyError> { - let registry_url = self.npmrc.get_registry_url(&package_nv.name); - let registry_config = self.npmrc.get_registry_config(&package_nv.name); - - let package_folder = self - .cache_dir - .package_folder_for_name_and_version(package_nv, registry_url); - let should_use_cache = self.should_use_cache_for_package(package_nv); - let package_folder_exists = self.fs.exists_sync(&package_folder); - if should_use_cache && package_folder_exists { - return Ok(()); - } else if self.cache_setting == CacheSetting::Only { - return Err(custom_error( - "NotCached", - format!( - "An npm specifier not found in cache: \"{}\", --cached-only is specified.", - &package_nv.name - ) - ) - ); - } - - if dist.tarball.is_empty() { - bail!("Tarball URL was empty."); - } - - let maybe_auth_header = maybe_auth_header_for_npm_registry(registry_config); - - let guard = self.progress_bar.update(&dist.tarball); - let maybe_bytes = self - .http_client - .download_with_progress(&dist.tarball, maybe_auth_header, &guard) - .await?; - match maybe_bytes { - Some(bytes) => { - let extraction_mode = if should_use_cache || !package_folder_exists { - TarballExtractionMode::SiblingTempDir - } else { - // The user ran with `--reload`, so overwrite the package instead of - // deleting it since the package might get corrupted if a user kills - // their deno process while it's deleting a package directory - // - // We can't rename this folder and delete it because the folder - // may be in use by another process or may now contain hardlinks, - // which will cause windows to throw an "AccessDenied" error when - // renaming. So we settle for overwriting. - TarballExtractionMode::Overwrite - }; - let dist = dist.clone(); - let package_nv = package_nv.clone(); - deno_core::unsync::spawn_blocking(move || { - verify_and_extract_tarball( - &package_nv, - &bytes, - &dist, - &package_folder, - extraction_mode, - ) - }) - .await? - } - None => { - bail!("Could not find npm package tarball at: {}", dist.tarball); - } - } - } - - /// Ensures a copy of the package exists in the global cache. - /// - /// This assumes that the original package folder being hard linked - /// from exists before this is called. - pub fn ensure_copy_package( - &self, - folder_id: &NpmPackageCacheFolderId, - ) -> Result<(), AnyError> { - let registry_url = self.npmrc.get_registry_url(&folder_id.nv.name); - assert_ne!(folder_id.copy_index, 0); - let package_folder = self - .cache_dir - .package_folder_for_id(folder_id, registry_url); - - if package_folder.exists() - // if this file exists, then the package didn't successfully initialize - // the first time, or another process is currently extracting the zip file - && !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists() - && self.cache_setting.should_use_for_npm_package(&folder_id.nv.name) - { - return Ok(()); - } - - let original_package_folder = self - .cache_dir - .package_folder_for_name_and_version(&folder_id.nv, registry_url); - - // it seems Windows does an "AccessDenied" error when moving a - // directory with hard links, so that's why this solution is done - with_folder_sync_lock(&folder_id.nv, &package_folder, || { - hard_link_dir_recursive(&original_package_folder, &package_folder) - })?; - Ok(()) - } - - pub fn package_folder_for_id(&self, id: &NpmPackageCacheFolderId) -> PathBuf { - let registry_url = self.npmrc.get_registry_url(&id.nv.name); - self.cache_dir.package_folder_for_id(id, registry_url) - } - - pub fn package_folder_for_name_and_version( - &self, - package: &PackageNv, - ) -> PathBuf { - let registry_url = self.npmrc.get_registry_url(&package.name); - self - .cache_dir - .package_folder_for_name_and_version(package, registry_url) - } - - pub fn package_name_folder(&self, name: &str) -> PathBuf { - let registry_url = self.npmrc.get_registry_url(name); - self.cache_dir.package_name_folder(name, registry_url) - } - - pub fn root_folder(&self) -> PathBuf { - self.cache_dir.root_dir().to_owned() - } - - pub fn resolve_package_folder_id_from_specifier( - &self, - specifier: &ModuleSpecifier, - ) -> Option { - self - .cache_dir - .resolve_package_folder_id_from_specifier(specifier) - } -} - -const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock"; - -fn with_folder_sync_lock( - package: &PackageNv, - output_folder: &Path, - action: impl FnOnce() -> Result<(), AnyError>, -) -> Result<(), AnyError> { - fn inner( - output_folder: &Path, - action: impl FnOnce() -> Result<(), AnyError>, - ) -> Result<(), AnyError> { - fs::create_dir_all(output_folder).with_context(|| { - format!("Error creating '{}'.", output_folder.display()) - })?; - - // This sync lock file is a way to ensure that partially created - // npm package directories aren't considered valid. This could maybe - // be a bit smarter in the future to not bother extracting here - // if another process has taken the lock in the past X seconds and - // wait for the other process to finish (it could try to create the - // file with `create_new(true)` then if it exists, check the metadata - // then wait until the other process finishes with a timeout), but - // for now this is good enough. - let sync_lock_path = output_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME); - match fs::OpenOptions::new() - .write(true) - .create(true) - .truncate(false) - .open(&sync_lock_path) - { - Ok(_) => { - action()?; - // extraction succeeded, so only now delete this file - let _ignore = std::fs::remove_file(&sync_lock_path); - Ok(()) - } - Err(err) => { - bail!( - concat!( - "Error creating package sync lock file at '{}'. ", - "Maybe try manually deleting this folder.\n\n{:#}", - ), - output_folder.display(), - err - ); - } - } - } - - match inner(output_folder, action) { - Ok(()) => Ok(()), - Err(err) => { - if let Err(remove_err) = fs::remove_dir_all(output_folder) { - if remove_err.kind() != std::io::ErrorKind::NotFound { - bail!( - concat!( - "Failed setting up package cache directory for {}, then ", - "failed cleaning it up.\n\nOriginal error:\n\n{}\n\n", - "Remove error:\n\n{}\n\nPlease manually ", - "delete this folder or you will run into issues using this ", - "package in the future:\n\n{}" - ), - package, - err, - remove_err, - output_folder.display(), - ); - } - } - Err(err) - } - } -} diff --git a/cli/npm/managed/cache/mod.rs b/cli/npm/managed/cache/mod.rs new file mode 100644 index 000000000..f409744b9 --- /dev/null +++ b/cli/npm/managed/cache/mod.rs @@ -0,0 +1,254 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashSet; +use std::fs; +use std::io::ErrorKind; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; + +use deno_ast::ModuleSpecifier; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::AnyError; +use deno_core::parking_lot::Mutex; +use deno_core::serde_json; +use deno_core::url::Url; +use deno_npm::npm_rc::ResolvedNpmRc; +use deno_npm::registry::NpmPackageInfo; +use deno_npm::NpmPackageCacheFolderId; +use deno_semver::package::PackageNv; + +use crate::args::CacheSetting; +use crate::cache::CACHE_PERM; +use crate::npm::NpmCacheDir; +use crate::util::fs::atomic_write_file_with_retries; +use crate::util::fs::hard_link_dir_recursive; + +mod registry_info; +mod tarball; +mod tarball_extract; + +pub use registry_info::RegistryInfoDownloader; +pub use tarball::TarballCache; + +/// Stores a single copy of npm packages in a cache. +#[derive(Debug)] +pub struct NpmCache { + cache_dir: NpmCacheDir, + cache_setting: CacheSetting, + npmrc: Arc, + /// ensures a package is only downloaded once per run + previously_reloaded_packages: Mutex>, +} + +impl NpmCache { + pub fn new( + cache_dir: NpmCacheDir, + cache_setting: CacheSetting, + npmrc: Arc, + ) -> Self { + Self { + cache_dir, + cache_setting, + previously_reloaded_packages: Default::default(), + npmrc, + } + } + + pub fn cache_setting(&self) -> &CacheSetting { + &self.cache_setting + } + + pub fn root_dir_url(&self) -> &Url { + self.cache_dir.root_dir_url() + } + + /// Checks if the cache should be used for the provided name and version. + /// NOTE: Subsequent calls for the same package will always return `true` + /// to ensure a package is only downloaded once per run of the CLI. This + /// prevents downloads from re-occurring when someone has `--reload` and + /// and imports a dynamic import that imports the same package again for example. + pub fn should_use_cache_for_package(&self, package: &PackageNv) -> bool { + self.cache_setting.should_use_for_npm_package(&package.name) + || !self + .previously_reloaded_packages + .lock() + .insert(package.clone()) + } + + /// Ensures a copy of the package exists in the global cache. + /// + /// This assumes that the original package folder being hard linked + /// from exists before this is called. + pub fn ensure_copy_package( + &self, + folder_id: &NpmPackageCacheFolderId, + ) -> Result<(), AnyError> { + let registry_url = self.npmrc.get_registry_url(&folder_id.nv.name); + assert_ne!(folder_id.copy_index, 0); + let package_folder = self + .cache_dir + .package_folder_for_id(folder_id, registry_url); + + if package_folder.exists() + // if this file exists, then the package didn't successfully initialize + // the first time, or another process is currently extracting the zip file + && !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists() + && self.cache_setting.should_use_for_npm_package(&folder_id.nv.name) + { + return Ok(()); + } + + let original_package_folder = self + .cache_dir + .package_folder_for_nv(&folder_id.nv, registry_url); + + // it seems Windows does an "AccessDenied" error when moving a + // directory with hard links, so that's why this solution is done + with_folder_sync_lock(&folder_id.nv, &package_folder, || { + hard_link_dir_recursive(&original_package_folder, &package_folder) + })?; + Ok(()) + } + + pub fn package_folder_for_id(&self, id: &NpmPackageCacheFolderId) -> PathBuf { + let registry_url = self.npmrc.get_registry_url(&id.nv.name); + self.cache_dir.package_folder_for_id(id, registry_url) + } + + pub fn package_folder_for_nv(&self, package: &PackageNv) -> PathBuf { + let registry_url = self.npmrc.get_registry_url(&package.name); + self.package_folder_for_nv_and_url(package, registry_url) + } + + pub fn package_folder_for_nv_and_url( + &self, + package: &PackageNv, + registry_url: &Url, + ) -> PathBuf { + self.cache_dir.package_folder_for_nv(package, registry_url) + } + + pub fn package_name_folder(&self, name: &str) -> PathBuf { + let registry_url = self.npmrc.get_registry_url(name); + self.cache_dir.package_name_folder(name, registry_url) + } + + pub fn root_folder(&self) -> PathBuf { + self.cache_dir.root_dir().to_owned() + } + + pub fn resolve_package_folder_id_from_specifier( + &self, + specifier: &ModuleSpecifier, + ) -> Option { + self + .cache_dir + .resolve_package_folder_id_from_specifier(specifier) + } + + pub fn load_package_info( + &self, + name: &str, + ) -> Result, AnyError> { + let file_cache_path = self.get_registry_package_info_file_cache_path(name); + + let file_text = match fs::read_to_string(file_cache_path) { + Ok(file_text) => file_text, + Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err.into()), + }; + Ok(serde_json::from_str(&file_text)?) + } + + pub fn save_package_info( + &self, + name: &str, + package_info: &NpmPackageInfo, + ) -> Result<(), AnyError> { + let file_cache_path = self.get_registry_package_info_file_cache_path(name); + let file_text = serde_json::to_string(&package_info)?; + atomic_write_file_with_retries(&file_cache_path, file_text, CACHE_PERM)?; + Ok(()) + } + + fn get_registry_package_info_file_cache_path(&self, name: &str) -> PathBuf { + let name_folder_path = self.package_name_folder(name); + name_folder_path.join("registry.json") + } +} + +const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock"; + +fn with_folder_sync_lock( + package: &PackageNv, + output_folder: &Path, + action: impl FnOnce() -> Result<(), AnyError>, +) -> Result<(), AnyError> { + fn inner( + output_folder: &Path, + action: impl FnOnce() -> Result<(), AnyError>, + ) -> Result<(), AnyError> { + fs::create_dir_all(output_folder).with_context(|| { + format!("Error creating '{}'.", output_folder.display()) + })?; + + // This sync lock file is a way to ensure that partially created + // npm package directories aren't considered valid. This could maybe + // be a bit smarter in the future to not bother extracting here + // if another process has taken the lock in the past X seconds and + // wait for the other process to finish (it could try to create the + // file with `create_new(true)` then if it exists, check the metadata + // then wait until the other process finishes with a timeout), but + // for now this is good enough. + let sync_lock_path = output_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME); + match fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(false) + .open(&sync_lock_path) + { + Ok(_) => { + action()?; + // extraction succeeded, so only now delete this file + let _ignore = std::fs::remove_file(&sync_lock_path); + Ok(()) + } + Err(err) => { + bail!( + concat!( + "Error creating package sync lock file at '{}'. ", + "Maybe try manually deleting this folder.\n\n{:#}", + ), + output_folder.display(), + err + ); + } + } + } + + match inner(output_folder, action) { + Ok(()) => Ok(()), + Err(err) => { + if let Err(remove_err) = fs::remove_dir_all(output_folder) { + if remove_err.kind() != std::io::ErrorKind::NotFound { + bail!( + concat!( + "Failed setting up package cache directory for {}, then ", + "failed cleaning it up.\n\nOriginal error:\n\n{}\n\n", + "Remove error:\n\n{}\n\nPlease manually ", + "delete this folder or you will run into issues using this ", + "package in the future:\n\n{}" + ), + package, + err, + remove_err, + output_folder.display(), + ); + } + } + Err(err) + } + } +} diff --git a/cli/npm/managed/cache/registry_info.rs b/cli/npm/managed/cache/registry_info.rs new file mode 100644 index 000000000..ea6b47969 --- /dev/null +++ b/cli/npm/managed/cache/registry_info.rs @@ -0,0 +1,284 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashMap; +use std::sync::Arc; + +use deno_core::anyhow::anyhow; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::custom_error; +use deno_core::error::AnyError; +use deno_core::futures::future::BoxFuture; +use deno_core::futures::future::Shared; +use deno_core::futures::FutureExt; +use deno_core::parking_lot::Mutex; +use deno_core::serde_json; +use deno_core::url::Url; +use deno_npm::npm_rc::RegistryConfig; +use deno_npm::npm_rc::ResolvedNpmRc; +use deno_npm::registry::NpmPackageInfo; + +use crate::args::CacheSetting; +use crate::http_util::HttpClient; +use crate::npm::common::maybe_auth_header_for_npm_registry; +use crate::util::progress_bar::ProgressBar; + +use super::NpmCache; + +// todo(dsherret): create seams and unit test this + +#[derive(Debug, Clone)] +enum MemoryCacheItem { + /// The cache item hasn't loaded yet. + PendingFuture(Shared), + /// The item has loaded in the past and was stored in the file system cache. + /// There is no reason to request this package from the npm registry again + /// for the duration of execution. + FsCached, + /// An item is memory cached when it fails saving to the file system cache + /// or the package does not exist. + MemoryCached(Result>, Arc>), +} + +#[derive(Debug, Clone)] +enum FutureResult { + PackageNotExists, + SavedFsCache(Arc), + ErroredFsCache(Arc), +} + +type PendingRegistryLoadFuture = + BoxFuture<'static, Result>>; + +/// Downloads packuments from the npm registry. +/// +/// This is shared amongst all the workers. +#[derive(Debug)] +pub struct RegistryInfoDownloader { + cache: Arc, + npmrc: Arc, + progress_bar: ProgressBar, + memory_cache: Mutex>, +} + +impl RegistryInfoDownloader { + pub fn new( + cache: Arc, + npmrc: Arc, + progress_bar: ProgressBar, + ) -> Self { + Self { + cache, + npmrc, + progress_bar, + memory_cache: Default::default(), + } + } + + pub async fn load_package_info( + &self, + name: &str, + current_runtime_http_client: &Arc, + ) -> Result>, AnyError> { + let registry_url = self.npmrc.get_registry_url(name); + let registry_config = self.npmrc.get_registry_config(name); + + self + .load_package_info_inner( + name, + registry_url, + registry_config, + current_runtime_http_client, + ) + .await + .with_context(|| { + format!( + "Error getting response at {} for package \"{}\"", + self.get_package_url(name, registry_url), + name + ) + }) + } + + async fn load_package_info_inner( + &self, + name: &str, + registry_url: &Url, + registry_config: &RegistryConfig, + current_runtime_http_client: &Arc, + ) -> Result>, AnyError> { + if *self.cache.cache_setting() == CacheSetting::Only { + return Err(custom_error( + "NotCached", + format!( + "An npm specifier not found in cache: \"{name}\", --cached-only is specified." + ) + )); + } + + let (created, cache_item) = { + let mut mem_cache = self.memory_cache.lock(); + if let Some(cache_item) = mem_cache.get(name) { + (false, cache_item.clone()) + } else { + let future = self.create_load_future( + name, + registry_url, + registry_config, + current_runtime_http_client, + ); + let cache_item = MemoryCacheItem::PendingFuture(future); + mem_cache.insert(name.to_string(), cache_item.clone()); + (true, cache_item) + } + }; + match cache_item { + MemoryCacheItem::FsCached => { + // this struct previously loaded from the registry, so we can load it from the file system cache + self + .load_file_cached_package_info(name) + .await + .map(|info| Some(Arc::new(info))) + } + MemoryCacheItem::MemoryCached(maybe_info) => { + maybe_info.clone().map_err(|e| anyhow!("{}", e)) + } + MemoryCacheItem::PendingFuture(future) => { + if created { + match future.await { + Ok(FutureResult::SavedFsCache(info)) => { + // return back the future and mark this package as having + // been saved in the cache for next time it's requested + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::FsCached; + Ok(Some(info)) + } + Ok(FutureResult::ErroredFsCache(info)) => { + // since saving to the fs cache failed, keep the package information in memory + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::MemoryCached(Ok(Some(info.clone()))); + Ok(Some(info)) + } + Ok(FutureResult::PackageNotExists) => { + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::MemoryCached(Ok(None)); + Ok(None) + } + Err(err) => { + let return_err = anyhow!("{}", err); + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::MemoryCached(Err(err)); + Err(return_err) + } + } + } else { + match future.await { + Ok(FutureResult::SavedFsCache(info)) => Ok(Some(info)), + Ok(FutureResult::ErroredFsCache(info)) => Ok(Some(info)), + Ok(FutureResult::PackageNotExists) => Ok(None), + Err(err) => Err(anyhow!("{}", err)), + } + } + } + } + } + + async fn load_file_cached_package_info( + &self, + name: &str, + ) -> Result { + // this scenario failing should be exceptionally rare so let's + // deal with improving it only when anyone runs into an issue + let maybe_package_info = deno_core::unsync::spawn_blocking({ + let cache = self.cache.clone(); + let name = name.to_string(); + move || cache.load_package_info(&name) + }) + .await + .unwrap() + .with_context(|| { + format!( + "Previously saved '{}' from the npm cache, but now it fails to load.", + name + ) + })?; + match maybe_package_info { + Some(package_info) => Ok(package_info), + None => { + bail!("The package '{}' previously saved its registry information to the file system cache, but that file no longer exists.", name) + } + } + } + + fn create_load_future( + &self, + name: &str, + registry_url: &Url, + registry_config: &RegistryConfig, + current_runtime_http_client: &Arc, + ) -> Shared { + let package_url = self.get_package_url(name, registry_url); + let maybe_auth_header = maybe_auth_header_for_npm_registry(registry_config); + let guard = self.progress_bar.update(package_url.as_str()); + let cache = self.cache.clone(); + let http_client = current_runtime_http_client.clone(); + let name = name.to_string(); + // force this future to be polled on the current runtime because it's not + // safe to share `HttpClient`s across runtimes and because a restart of + // npm resolution might cause this package not to be resolved again + // causing the future to never be polled + deno_core::unsync::spawn(async move { + let maybe_bytes = http_client + .download_with_progress(package_url, maybe_auth_header, &guard) + .await?; + match maybe_bytes { + Some(bytes) => { + let future_result = deno_core::unsync::spawn_blocking( + move || -> Result { + let package_info = serde_json::from_slice(&bytes)?; + match cache.save_package_info(&name, &package_info) { + Ok(()) => { + Ok(FutureResult::SavedFsCache(Arc::new(package_info))) + } + Err(err) => { + log::debug!( + "Error saving package {} to cache: {:#}", + name, + err + ); + Ok(FutureResult::ErroredFsCache(Arc::new(package_info))) + } + } + }, + ) + .await??; + Ok(future_result) + } + None => Ok(FutureResult::PackageNotExists), + } + }) + .map(|result| result.unwrap().map_err(Arc::new)) + .boxed() + .shared() + } + + fn get_package_url(&self, name: &str, registry_url: &Url) -> Url { + // list of all characters used in npm packages: + // !, ', (, ), *, -, ., /, [0-9], @, [A-Za-z], _, ~ + const ASCII_SET: percent_encoding::AsciiSet = + percent_encoding::NON_ALPHANUMERIC + .remove(b'!') + .remove(b'\'') + .remove(b'(') + .remove(b')') + .remove(b'*') + .remove(b'-') + .remove(b'.') + .remove(b'/') + .remove(b'@') + .remove(b'_') + .remove(b'~'); + let name = percent_encoding::utf8_percent_encode(name, &ASCII_SET); + registry_url.join(&name.to_string()).unwrap() + } +} diff --git a/cli/npm/managed/cache/tarball.rs b/cli/npm/managed/cache/tarball.rs new file mode 100644 index 000000000..9848aca13 --- /dev/null +++ b/cli/npm/managed/cache/tarball.rs @@ -0,0 +1,210 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashMap; +use std::sync::Arc; + +use deno_core::anyhow::anyhow; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::custom_error; +use deno_core::error::AnyError; +use deno_core::futures::future::BoxFuture; +use deno_core::futures::future::Shared; +use deno_core::futures::FutureExt; +use deno_core::parking_lot::Mutex; +use deno_npm::npm_rc::ResolvedNpmRc; +use deno_npm::registry::NpmPackageVersionDistInfo; +use deno_runtime::deno_fs::FileSystem; +use deno_semver::package::PackageNv; + +use crate::args::CacheSetting; +use crate::http_util::HttpClient; +use crate::npm::common::maybe_auth_header_for_npm_registry; +use crate::util::progress_bar::ProgressBar; + +use super::tarball_extract::verify_and_extract_tarball; +use super::tarball_extract::TarballExtractionMode; +use super::NpmCache; + +// todo(dsherret): create seams and unit test this + +#[derive(Debug, Clone)] +enum MemoryCacheItem { + /// The cache item hasn't finished yet. + PendingFuture(Shared>>>), + /// The result errored. + Errored(Arc), + /// This package has already been cached. + Cached, +} + +/// Coordinates caching of tarballs being loaded from +/// the npm registry. +/// +/// This is shared amongst all the workers. +#[derive(Debug)] +pub struct TarballCache { + cache: Arc, + fs: Arc, + npmrc: Arc, + progress_bar: ProgressBar, + memory_cache: Mutex>, +} + +impl TarballCache { + pub fn new( + cache: Arc, + fs: Arc, + npmrc: Arc, + progress_bar: ProgressBar, + ) -> Self { + Self { + cache, + fs, + npmrc, + progress_bar, + memory_cache: Default::default(), + } + } + + pub async fn ensure_package( + &self, + package: &PackageNv, + dist: &NpmPackageVersionDistInfo, + // it's not safe to share these across runtimes + http_client_for_runtime: &Arc, + ) -> Result<(), AnyError> { + self + .ensure_package_inner(package, dist, http_client_for_runtime) + .await + .with_context(|| format!("Failed caching npm package '{}'.", package)) + } + + async fn ensure_package_inner( + &self, + package_nv: &PackageNv, + dist: &NpmPackageVersionDistInfo, + http_client_for_runtime: &Arc, + ) -> Result<(), AnyError> { + let (created, cache_item) = { + let mut mem_cache = self.memory_cache.lock(); + if let Some(cache_item) = mem_cache.get(package_nv) { + (false, cache_item.clone()) + } else { + let future = self.create_setup_future( + package_nv.clone(), + dist.clone(), + http_client_for_runtime.clone(), + ); + let cache_item = MemoryCacheItem::PendingFuture(future); + mem_cache.insert(package_nv.clone(), cache_item.clone()); + (true, cache_item) + } + }; + + match cache_item { + MemoryCacheItem::Cached => Ok(()), + MemoryCacheItem::Errored(err) => Err(anyhow!("{}", err)), + MemoryCacheItem::PendingFuture(future) => { + if created { + match future.await { + Ok(_) => { + *self.memory_cache.lock().get_mut(package_nv).unwrap() = + MemoryCacheItem::Cached; + Ok(()) + } + Err(err) => { + let result_err = anyhow!("{}", err); + *self.memory_cache.lock().get_mut(package_nv).unwrap() = + MemoryCacheItem::Errored(err); + Err(result_err) + } + } + } else { + future.await.map_err(|err| anyhow!("{}", err)) + } + } + } + } + + fn create_setup_future( + &self, + package_nv: PackageNv, + dist: NpmPackageVersionDistInfo, + http_client_for_runtime: Arc, + ) -> Shared>>> { + let registry_url = self.npmrc.get_registry_url(&package_nv.name); + let registry_config = + self.npmrc.get_registry_config(&package_nv.name).clone(); + + let cache = self.cache.clone(); + let fs = self.fs.clone(); + let progress_bar = self.progress_bar.clone(); + let package_folder = + cache.package_folder_for_nv_and_url(&package_nv, registry_url); + + deno_core::unsync::spawn(async move { + let should_use_cache = cache.should_use_cache_for_package(&package_nv); + let package_folder_exists = fs.exists_sync(&package_folder); + if should_use_cache && package_folder_exists { + return Ok(()); + } else if cache.cache_setting() == &CacheSetting::Only { + return Err(custom_error( + "NotCached", + format!( + "An npm specifier not found in cache: \"{}\", --cached-only is specified.", + &package_nv.name + ) + ) + ); + } + + if dist.tarball.is_empty() { + bail!("Tarball URL was empty."); + } + + let maybe_auth_header = + maybe_auth_header_for_npm_registry(®istry_config); + + let guard = progress_bar.update(&dist.tarball); + let maybe_bytes = http_client_for_runtime + .download_with_progress(&dist.tarball, maybe_auth_header, &guard) + .await?; + match maybe_bytes { + Some(bytes) => { + let extraction_mode = if should_use_cache || !package_folder_exists { + TarballExtractionMode::SiblingTempDir + } else { + // The user ran with `--reload`, so overwrite the package instead of + // deleting it since the package might get corrupted if a user kills + // their deno process while it's deleting a package directory + // + // We can't rename this folder and delete it because the folder + // may be in use by another process or may now contain hardlinks, + // which will cause windows to throw an "AccessDenied" error when + // renaming. So we settle for overwriting. + TarballExtractionMode::Overwrite + }; + let dist = dist.clone(); + let package_nv = package_nv.clone(); + deno_core::unsync::spawn_blocking(move || { + verify_and_extract_tarball( + &package_nv, + &bytes, + &dist, + &package_folder, + extraction_mode, + ) + }) + .await? + } + None => { + bail!("Could not find npm package tarball at: {}", dist.tarball); + } + } + }) + .map(|result| result.unwrap().map_err(Arc::new)) + .boxed() + .shared() + } +} diff --git a/cli/npm/managed/cache/tarball_extract.rs b/cli/npm/managed/cache/tarball_extract.rs new file mode 100644 index 000000000..e2d242e66 --- /dev/null +++ b/cli/npm/managed/cache/tarball_extract.rs @@ -0,0 +1,324 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashSet; +use std::fs; +use std::io::ErrorKind; +use std::path::Path; +use std::path::PathBuf; + +use base64::prelude::BASE64_STANDARD; +use base64::Engine; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::AnyError; +use deno_npm::registry::NpmPackageVersionDistInfo; +use deno_npm::registry::NpmPackageVersionDistInfoIntegrity; +use deno_semver::package::PackageNv; +use flate2::read::GzDecoder; +use tar::Archive; +use tar::EntryType; + +use crate::util::path::get_atomic_dir_path; + +#[derive(Debug, Copy, Clone)] +pub enum TarballExtractionMode { + /// Overwrites the destination directory without deleting any files. + Overwrite, + /// Creates and writes to a sibling temporary directory. When done, moves + /// it to the final destination. + /// + /// This is more robust than `Overwrite` as it better handles multiple + /// processes writing to the directory at the same time. + SiblingTempDir, +} + +pub fn verify_and_extract_tarball( + package_nv: &PackageNv, + data: &[u8], + dist_info: &NpmPackageVersionDistInfo, + output_folder: &Path, + extraction_mode: TarballExtractionMode, +) -> Result<(), AnyError> { + verify_tarball_integrity(package_nv, data, &dist_info.integrity())?; + + match extraction_mode { + TarballExtractionMode::Overwrite => extract_tarball(data, output_folder), + TarballExtractionMode::SiblingTempDir => { + let temp_dir = get_atomic_dir_path(output_folder); + extract_tarball(data, &temp_dir)?; + rename_with_retries(&temp_dir, output_folder) + .map_err(AnyError::from) + .context("Failed moving extracted tarball to final destination.") + } + } +} + +fn rename_with_retries( + temp_dir: &Path, + output_folder: &Path, +) -> Result<(), std::io::Error> { + fn already_exists(err: &std::io::Error, output_folder: &Path) -> bool { + // Windows will do an "Access is denied" error + err.kind() == ErrorKind::AlreadyExists || output_folder.exists() + } + + let mut count = 0; + // renaming might be flaky if a lot of processes are trying + // to do this, so retry a few times + loop { + match fs::rename(temp_dir, output_folder) { + Ok(_) => return Ok(()), + Err(err) if already_exists(&err, output_folder) => { + // another process copied here, just cleanup + let _ = fs::remove_dir_all(temp_dir); + return Ok(()); + } + Err(err) => { + count += 1; + if count > 5 { + // too many retries, cleanup and return the error + let _ = fs::remove_dir_all(temp_dir); + return Err(err); + } + + // wait a bit before retrying... this should be very rare or only + // in error cases, so ok to sleep a bit + let sleep_ms = std::cmp::min(100, 20 * count); + std::thread::sleep(std::time::Duration::from_millis(sleep_ms)); + } + } + } +} + +fn verify_tarball_integrity( + package: &PackageNv, + data: &[u8], + npm_integrity: &NpmPackageVersionDistInfoIntegrity, +) -> Result<(), AnyError> { + use ring::digest::Context; + let (tarball_checksum, expected_checksum) = match npm_integrity { + NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm, + base64_hash, + } => { + let algo = match *algorithm { + "sha512" => &ring::digest::SHA512, + "sha1" => &ring::digest::SHA1_FOR_LEGACY_USE_ONLY, + hash_kind => bail!( + "Not implemented hash function for {}: {}", + package, + hash_kind + ), + }; + let mut hash_ctx = Context::new(algo); + hash_ctx.update(data); + let digest = hash_ctx.finish(); + let tarball_checksum = BASE64_STANDARD.encode(digest.as_ref()); + (tarball_checksum, base64_hash) + } + NpmPackageVersionDistInfoIntegrity::LegacySha1Hex(hex) => { + let mut hash_ctx = Context::new(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY); + hash_ctx.update(data); + let digest = hash_ctx.finish(); + let tarball_checksum = faster_hex::hex_string(digest.as_ref()); + (tarball_checksum, hex) + } + NpmPackageVersionDistInfoIntegrity::UnknownIntegrity(integrity) => { + bail!( + "Not implemented integrity kind for {}: {}", + package, + integrity + ) + } + }; + + if tarball_checksum != *expected_checksum { + bail!( + "Tarball checksum did not match what was provided by npm registry for {}.\n\nExpected: {}\nActual: {}", + package, + expected_checksum, + tarball_checksum, + ) + } + Ok(()) +} + +fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> { + fs::create_dir_all(output_folder)?; + let output_folder = fs::canonicalize(output_folder)?; + let tar = GzDecoder::new(data); + let mut archive = Archive::new(tar); + archive.set_overwrite(true); + archive.set_preserve_permissions(true); + let mut created_dirs = HashSet::new(); + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + let entry_type = entry.header().entry_type(); + + // Some package tarballs contain "pax_global_header", these entries + // should be skipped. + if entry_type == EntryType::XGlobalHeader { + continue; + } + + // skip the first component which will be either "package" or the name of the package + let relative_path = path.components().skip(1).collect::(); + let absolute_path = output_folder.join(relative_path); + let dir_path = if entry_type == EntryType::Directory { + absolute_path.as_path() + } else { + absolute_path.parent().unwrap() + }; + if created_dirs.insert(dir_path.to_path_buf()) { + fs::create_dir_all(dir_path)?; + let canonicalized_dir = fs::canonicalize(dir_path)?; + if !canonicalized_dir.starts_with(&output_folder) { + bail!( + "Extracted directory '{}' of npm tarball was not in output directory.", + canonicalized_dir.display() + ) + } + } + + let entry_type = entry.header().entry_type(); + match entry_type { + EntryType::Regular => { + entry.unpack(&absolute_path)?; + } + EntryType::Symlink | EntryType::Link => { + // At the moment, npm doesn't seem to support uploading hardlinks or + // symlinks to the npm registry. If ever adding symlink or hardlink + // support, we will need to validate that the hardlink and symlink + // target are within the package directory. + log::warn!( + "Ignoring npm tarball entry type {:?} for '{}'", + entry_type, + absolute_path.display() + ) + } + _ => { + // ignore + } + } + } + Ok(()) +} + +#[cfg(test)] +mod test { + use deno_semver::Version; + use test_util::TempDir; + + use super::*; + + #[test] + pub fn test_verify_tarball() { + let package = PackageNv { + name: "package".to_string(), + version: Version::parse_from_npm("1.0.0").unwrap(), + }; + let actual_checksum = + "z4PhNX7vuL3xVChQ1m2AB9Yg5AULVxXcg/SpIdNs6c5H0NE8XYXysP+DGNKHfuwvY7kxvUdBeoGlODJ6+SfaPg=="; + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::UnknownIntegrity("test") + ) + .unwrap_err() + .to_string(), + "Not implemented integrity kind for package@1.0.0: test", + ); + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "notimplemented", + base64_hash: "test" + } + ) + .unwrap_err() + .to_string(), + "Not implemented hash function for package@1.0.0: notimplemented", + ); + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "sha1", + base64_hash: "test" + } + ) + .unwrap_err() + .to_string(), + concat!( + "Tarball checksum did not match what was provided by npm ", + "registry for package@1.0.0.\n\nExpected: test\nActual: 2jmj7l5rSw0yVb/vlWAYkK/YBwk=", + ), + ); + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "sha512", + base64_hash: "test" + } + ) + .unwrap_err() + .to_string(), + format!("Tarball checksum did not match what was provided by npm registry for package@1.0.0.\n\nExpected: test\nActual: {actual_checksum}"), + ); + assert!(verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "sha512", + base64_hash: actual_checksum, + }, + ) + .is_ok()); + let actual_hex = "da39a3ee5e6b4b0d3255bfef95601890afd80709"; + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::LegacySha1Hex("test"), + ) + .unwrap_err() + .to_string(), + format!("Tarball checksum did not match what was provided by npm registry for package@1.0.0.\n\nExpected: test\nActual: {actual_hex}"), + ); + assert!(verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::LegacySha1Hex(actual_hex), + ) + .is_ok()); + } + + #[test] + fn rename_with_retries_succeeds_exists() { + let temp_dir = TempDir::new(); + let folder_1 = temp_dir.path().join("folder_1"); + let folder_2 = temp_dir.path().join("folder_2"); + + folder_1.create_dir_all(); + folder_1.join("a.txt").write("test"); + folder_2.create_dir_all(); + // this will not end up in the output as rename_with_retries assumes + // the folders ending up at the destination are the same + folder_2.join("b.txt").write("test2"); + + let dest_folder = temp_dir.path().join("dest_folder"); + + rename_with_retries(folder_1.as_path(), dest_folder.as_path()).unwrap(); + rename_with_retries(folder_2.as_path(), dest_folder.as_path()).unwrap(); + assert!(dest_folder.join("a.txt").exists()); + assert!(!dest_folder.join("b.txt").exists()); + } +} diff --git a/cli/npm/managed/mod.rs b/cli/npm/managed/mod.rs index 95d4dca0f..718806ced 100644 --- a/cli/npm/managed/mod.rs +++ b/cli/npm/managed/mod.rs @@ -4,6 +4,7 @@ use std::path::Path; use std::path::PathBuf; use std::sync::Arc; +use cache::RegistryInfoDownloader; use deno_ast::ModuleSpecifier; use deno_core::anyhow::Context; use deno_core::error::AnyError; @@ -30,6 +31,7 @@ use crate::args::NpmProcessState; use crate::args::NpmProcessStateKind; use crate::args::PackageJsonDepsProvider; use crate::cache::FastInsecureHasher; +use crate::http_util::HttpClient; use crate::util::fs::canonicalize_path_maybe_not_exists_with_fs; use crate::util::progress_bar::ProgressBar; @@ -49,7 +51,6 @@ mod installer; mod registry; mod resolution; mod resolvers; -mod tarball; pub enum CliNpmResolverManagedSnapshotOption { ResolveFromLockfile(Arc>), @@ -88,15 +89,17 @@ pub async fn create_managed_npm_resolver_for_lsp( } }; create_inner( - npm_cache, - npm_api, - snapshot, - options.maybe_lockfile, options.fs, + options.http_client, + options.maybe_lockfile, + npm_api, + npm_cache, + options.npmrc, + options.package_json_installer, options.text_only_progress_bar, options.maybe_node_modules_path, - options.package_json_installer, options.npm_system_info, + snapshot, ) } @@ -107,38 +110,43 @@ pub async fn create_managed_npm_resolver( let npm_api = create_api(&options, npm_cache.clone()); let snapshot = resolve_snapshot(&npm_api, options.snapshot).await?; Ok(create_inner( - npm_cache, - npm_api, - snapshot, - options.maybe_lockfile, options.fs, + options.http_client, + options.maybe_lockfile, + npm_api, + npm_cache, + options.npmrc, + options.package_json_installer, options.text_only_progress_bar, options.maybe_node_modules_path, - options.package_json_installer, options.npm_system_info, + snapshot, )) } #[allow(clippy::too_many_arguments)] fn create_inner( - npm_cache: Arc, - npm_api: Arc, - snapshot: Option, - maybe_lockfile: Option>>, fs: Arc, + http_client: Arc, + maybe_lockfile: Option>>, + npm_api: Arc, + npm_cache: Arc, + npm_rc: Arc, + package_json_installer: CliNpmResolverManagedPackageJsonInstallerOption, text_only_progress_bar: crate::util::progress_bar::ProgressBar, node_modules_dir_path: Option, - package_json_installer: CliNpmResolverManagedPackageJsonInstallerOption, npm_system_info: NpmSystemInfo, + snapshot: Option, ) -> Arc { let resolution = Arc::new(NpmResolution::from_serialized( npm_api.clone(), snapshot, maybe_lockfile.clone(), )); - let npm_fs_resolver = create_npm_fs_resolver( + let fs_resolver = create_npm_fs_resolver( fs.clone(), npm_cache.clone(), + npm_rc.clone(), &text_only_progress_bar, resolution.clone(), node_modules_dir_path, @@ -157,13 +165,15 @@ fn create_inner( } }; Arc::new(ManagedCliNpmResolver::new( - npm_api, fs, - resolution, - npm_fs_resolver, - npm_cache, + fs_resolver, + http_client, maybe_lockfile, + npm_api, + npm_cache, + npm_rc, package_json_deps_installer, + resolution, text_only_progress_bar, npm_system_info, )) @@ -176,9 +186,6 @@ fn create_cache(options: &CliNpmResolverManagedCreateOptions) -> Arc { options.npmrc.get_all_known_registries_urls(), ), options.cache_setting.clone(), - options.fs.clone(), - options.http_client.clone(), - options.text_only_progress_bar.clone(), options.npmrc.clone(), )) } @@ -190,8 +197,11 @@ fn create_api( Arc::new(CliNpmRegistryApi::new( npm_cache.clone(), options.http_client.clone(), - options.npmrc.clone(), - options.text_only_progress_bar.clone(), + RegistryInfoDownloader::new( + npm_cache, + options.npmrc.clone(), + options.text_only_progress_bar.clone(), + ), )) } @@ -210,8 +220,6 @@ async fn resolve_snapshot( lockfile.lock().filename.display() ) })?; - // clear the memory cache to reduce memory usage - api.clear_memory_cache(); Ok(Some(snapshot)) } else { Ok(None) @@ -246,15 +254,17 @@ async fn snapshot_from_lockfile( /// An npm resolver where the resolution is managed by Deno rather than /// the user bringing their own node_modules (BYONM) on the file system. pub struct ManagedCliNpmResolver { - api: Arc, fs: Arc, fs_resolver: Arc, - global_npm_cache: Arc, - resolution: Arc, + http_client: Arc, maybe_lockfile: Option>>, - npm_system_info: NpmSystemInfo, - progress_bar: ProgressBar, + npm_api: Arc, + npm_cache: Arc, + npm_rc: Arc, package_json_deps_installer: Arc, + resolution: Arc, + text_only_progress_bar: ProgressBar, + npm_system_info: NpmSystemInfo, } impl std::fmt::Debug for ManagedCliNpmResolver { @@ -268,25 +278,29 @@ impl std::fmt::Debug for ManagedCliNpmResolver { impl ManagedCliNpmResolver { #[allow(clippy::too_many_arguments)] pub fn new( - api: Arc, fs: Arc, - resolution: Arc, fs_resolver: Arc, - global_npm_cache: Arc, + http_client: Arc, maybe_lockfile: Option>>, + npm_api: Arc, + npm_cache: Arc, + npm_rc: Arc, package_json_deps_installer: Arc, - progress_bar: ProgressBar, + resolution: Arc, + text_only_progress_bar: ProgressBar, npm_system_info: NpmSystemInfo, ) -> Self { Self { - api, fs, fs_resolver, - global_npm_cache, - resolution, + http_client, maybe_lockfile, + npm_api, + npm_cache, + npm_rc, package_json_deps_installer, - progress_bar, + text_only_progress_bar, + resolution, npm_system_info, } } @@ -367,7 +381,7 @@ impl ManagedCliNpmResolver { } self.resolution.add_package_reqs(packages).await?; - self.fs_resolver.cache_packages().await?; + self.fs_resolver.cache_packages(&self.http_client).await?; // If there's a lock file, update it with all discovered npm packages if let Some(lockfile) = &self.maybe_lockfile { @@ -410,7 +424,7 @@ impl ManagedCliNpmResolver { // add and ensure this isn't added to the lockfile let package_reqs = vec![PackageReq::from_str("@types/node").unwrap()]; self.resolution.add_package_reqs(&package_reqs).await?; - self.fs_resolver.cache_packages().await?; + self.cache_packages().await?; Ok(()) } @@ -421,7 +435,7 @@ impl ManagedCliNpmResolver { } pub async fn cache_packages(&self) -> Result<(), AnyError> { - self.fs_resolver.cache_packages().await + self.fs_resolver.cache_packages(&self.http_client).await } /// Resolves a package requirement for deno graph. This should only be @@ -435,7 +449,7 @@ impl ManagedCliNpmResolver { match result { Ok(nv) => NpmPackageReqResolution::Ok(nv), Err(err) => { - if self.api.mark_force_reload() { + if self.npm_api.mark_force_reload() { log::debug!("Restarting npm specifier resolution to check for new registry information. Error: {:#}", err); NpmPackageReqResolution::ReloadRegistryInfo(err.into()) } else { @@ -475,7 +489,7 @@ impl ManagedCliNpmResolver { ) -> Result<(), AnyError> { // this will internally cache the package information self - .api + .npm_api .package_info(package_name) .await .map(|_| ()) @@ -483,7 +497,7 @@ impl ManagedCliNpmResolver { } pub fn global_cache_root_folder(&self) -> PathBuf { - self.global_npm_cache.root_folder() + self.npm_cache.root_folder() } } @@ -543,27 +557,30 @@ impl CliNpmResolver for ManagedCliNpmResolver { fn clone_snapshotted(&self) -> Arc { // create a new snapshotted npm resolution and resolver let npm_resolution = Arc::new(NpmResolution::new( - self.api.clone(), + self.npm_api.clone(), self.resolution.snapshot(), self.maybe_lockfile.clone(), )); Arc::new(ManagedCliNpmResolver::new( - self.api.clone(), self.fs.clone(), - npm_resolution.clone(), create_npm_fs_resolver( self.fs.clone(), - self.global_npm_cache.clone(), - &self.progress_bar, - npm_resolution, + self.npm_cache.clone(), + self.npm_rc.clone(), + &self.text_only_progress_bar, + npm_resolution.clone(), self.root_node_modules_path().map(ToOwned::to_owned), self.npm_system_info.clone(), ), - self.global_npm_cache.clone(), + self.http_client.clone(), self.maybe_lockfile.clone(), + self.npm_api.clone(), + self.npm_cache.clone(), + self.npm_rc.clone(), self.package_json_deps_installer.clone(), - self.progress_bar.clone(), + npm_resolution, + self.text_only_progress_bar.clone(), self.npm_system_info.clone(), )) } diff --git a/cli/npm/managed/registry.rs b/cli/npm/managed/registry.rs index 861ce2a4b..364529ed2 100644 --- a/cli/npm/managed/registry.rs +++ b/cli/npm/managed/registry.rs @@ -2,38 +2,27 @@ use std::collections::HashMap; use std::collections::HashSet; -use std::fs; -use std::io::ErrorKind; -use std::path::PathBuf; use std::sync::Arc; use async_trait::async_trait; use deno_core::anyhow::anyhow; -use deno_core::anyhow::Context; -use deno_core::error::custom_error; use deno_core::error::AnyError; use deno_core::futures::future::BoxFuture; use deno_core::futures::future::Shared; use deno_core::futures::FutureExt; use deno_core::parking_lot::Mutex; -use deno_core::serde_json; -use deno_core::url::Url; -use deno_npm::npm_rc::RegistryConfig; -use deno_npm::npm_rc::ResolvedNpmRc; use deno_npm::registry::NpmPackageInfo; use deno_npm::registry::NpmRegistryApi; use deno_npm::registry::NpmRegistryPackageInfoLoadError; use crate::args::CacheSetting; -use crate::cache::CACHE_PERM; use crate::http_util::HttpClient; -use crate::npm::common::maybe_auth_header_for_npm_registry; -use crate::util::fs::atomic_write_file; -use crate::util::progress_bar::ProgressBar; use crate::util::sync::AtomicFlag; use super::cache::NpmCache; +use super::cache::RegistryInfoDownloader; +// todo(dsherret): make this per worker and make HttpClient an Rc #[derive(Debug)] pub struct CliNpmRegistryApi(Option>); @@ -41,17 +30,15 @@ impl CliNpmRegistryApi { pub fn new( cache: Arc, http_client: Arc, - npmrc: Arc, - progress_bar: ProgressBar, + registry_info_downloader: RegistryInfoDownloader, ) -> Self { Self(Some(Arc::new(CliNpmRegistryApiInner { cache, force_reload_flag: Default::default(), mem_cache: Default::default(), previously_reloaded_packages: Default::default(), - npmrc, http_client, - progress_bar, + registry_info_downloader, }))) } @@ -125,8 +112,7 @@ struct CliNpmRegistryApiInner { mem_cache: Mutex>, previously_reloaded_packages: Mutex>, http_client: Arc, - npmrc: Arc, - progress_bar: ProgressBar, + registry_info_downloader: RegistryInfoDownloader, } impl CliNpmRegistryApiInner { @@ -157,10 +143,9 @@ impl CliNpmRegistryApiInner { return Ok(result); } } - api - .load_package_info_from_registry(&name) + api.registry_info_downloader + .load_package_info(&name, &api.http_client) .await - .map(|info| info.map(Arc::new)) .map_err(Arc::new) } .boxed() @@ -202,7 +187,14 @@ impl CliNpmRegistryApiInner { &self, name: &str, ) -> Option { - match self.load_file_cached_package_info_result(name).await { + let result = deno_core::unsync::spawn_blocking({ + let cache = self.cache.clone(); + let name = name.to_string(); + move || cache.load_package_info(&name) + }) + .await + .unwrap(); + match result { Ok(value) => value, Err(err) => { if cfg!(debug_assertions) { @@ -214,149 +206,6 @@ impl CliNpmRegistryApiInner { } } - async fn load_file_cached_package_info_result( - &self, - name: &str, - ) -> Result, AnyError> { - let file_cache_path = self.get_package_file_cache_path(name); - let deserialization_result = deno_core::unsync::spawn_blocking(|| { - let file_text = match fs::read_to_string(file_cache_path) { - Ok(file_text) => file_text, - Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), - Err(err) => return Err(err.into()), - }; - serde_json::from_str(&file_text) - .map(Some) - .map_err(AnyError::from) - }) - .await - .unwrap(); - match deserialization_result { - Ok(maybe_package_info) => Ok(maybe_package_info), - Err(err) => { - // This scenario might mean we need to load more data from the - // npm registry than before. So, just debug log while in debug - // rather than panic. - log::debug!( - "error deserializing registry.json for '{}'. Reloading. {:?}", - name, - err - ); - Ok(None) - } - } - } - - fn save_package_info_to_file_cache( - &self, - name: &str, - package_info: &NpmPackageInfo, - ) { - if let Err(err) = - self.save_package_info_to_file_cache_result(name, package_info) - { - if cfg!(debug_assertions) { - panic!("error saving cached npm package info for {name}: {err:#}"); - } - } - } - - fn save_package_info_to_file_cache_result( - &self, - name: &str, - package_info: &NpmPackageInfo, - ) -> Result<(), AnyError> { - let file_cache_path = self.get_package_file_cache_path(name); - let file_text = serde_json::to_string(&package_info)?; - atomic_write_file(&file_cache_path, file_text, CACHE_PERM)?; - Ok(()) - } - - async fn load_package_info_from_registry( - &self, - name: &str, - ) -> Result, AnyError> { - let registry_url = self.npmrc.get_registry_url(name); - let registry_config = self.npmrc.get_registry_config(name); - - self - .load_package_info_from_registry_inner( - name, - registry_url, - registry_config, - ) - .await - .with_context(|| { - format!( - "Error getting response at {} for package \"{}\"", - self.get_package_url(name, registry_url), - name - ) - }) - } - - async fn load_package_info_from_registry_inner( - &self, - name: &str, - registry_url: &Url, - registry_config: &RegistryConfig, - ) -> Result, AnyError> { - if *self.cache.cache_setting() == CacheSetting::Only { - return Err(custom_error( - "NotCached", - format!( - "An npm specifier not found in cache: \"{name}\", --cached-only is specified." - ) - )); - } - - let package_url = self.get_package_url(name, registry_url); - let guard = self.progress_bar.update(package_url.as_str()); - - let maybe_auth_header = maybe_auth_header_for_npm_registry(registry_config); - - let maybe_bytes = self - .http_client - .download_with_progress(package_url, maybe_auth_header, &guard) - .await?; - match maybe_bytes { - Some(bytes) => { - let package_info = deno_core::unsync::spawn_blocking(move || { - serde_json::from_slice(&bytes) - }) - .await??; - self.save_package_info_to_file_cache(name, &package_info); - Ok(Some(package_info)) - } - None => Ok(None), - } - } - - fn get_package_url(&self, name: &str, registry_url: &Url) -> Url { - // list of all characters used in npm packages: - // !, ', (, ), *, -, ., /, [0-9], @, [A-Za-z], _, ~ - const ASCII_SET: percent_encoding::AsciiSet = - percent_encoding::NON_ALPHANUMERIC - .remove(b'!') - .remove(b'\'') - .remove(b'(') - .remove(b')') - .remove(b'*') - .remove(b'-') - .remove(b'.') - .remove(b'/') - .remove(b'@') - .remove(b'_') - .remove(b'~'); - let name = percent_encoding::utf8_percent_encode(name, &ASCII_SET); - registry_url.join(&name.to_string()).unwrap() - } - - fn get_package_file_cache_path(&self, name: &str) -> PathBuf { - let name_folder_path = self.cache.package_name_folder(name); - name_folder_path.join("registry.json") - } - fn clear_memory_cache(&self) { self.mem_cache.lock().clear(); } diff --git a/cli/npm/managed/resolvers/common.rs b/cli/npm/managed/resolvers/common.rs index b010bdd7c..2d540accd 100644 --- a/cli/npm/managed/resolvers/common.rs +++ b/cli/npm/managed/resolvers/common.rs @@ -12,7 +12,7 @@ use deno_ast::ModuleSpecifier; use deno_core::anyhow::Context; use deno_core::error::AnyError; use deno_core::futures; -use deno_core::unsync::spawn; +use deno_core::futures::StreamExt; use deno_core::url::Url; use deno_npm::NpmPackageCacheFolderId; use deno_npm::NpmPackageId; @@ -21,7 +21,8 @@ use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; -use super::super::cache::NpmCache; +use crate::http_util::HttpClient; +use crate::npm::managed::cache::TarballCache; /// Part of the resolution that interacts with the file system. #[async_trait] @@ -49,7 +50,10 @@ pub trait NpmPackageFsResolver: Send + Sync { specifier: &ModuleSpecifier, ) -> Result, AnyError>; - async fn cache_packages(&self) -> Result<(), AnyError>; + async fn cache_packages( + &self, + http_client: &Arc, + ) -> Result<(), AnyError>; fn ensure_read_permission( &self, @@ -126,20 +130,20 @@ impl RegistryReadPermissionChecker { /// Caches all the packages in parallel. pub async fn cache_packages( packages: Vec, - cache: &Arc, + tarball_cache: &Arc, + http_client: &Arc, ) -> Result<(), AnyError> { - let mut handles = Vec::with_capacity(packages.len()); + let mut futures_unordered = futures::stream::FuturesUnordered::new(); for package in packages { - let cache = cache.clone(); - let handle = spawn(async move { - cache.ensure_package(&package.id.nv, &package.dist).await + futures_unordered.push(async move { + tarball_cache + .ensure_package(&package.id.nv, &package.dist, http_client) + .await }); - handles.push(handle); } - let results = futures::future::join_all(handles).await; - for result in results { + while let Some(result) = futures_unordered.next().await { // surface the first error - result??; + result?; } Ok(()) } diff --git a/cli/npm/managed/resolvers/global.rs b/cli/npm/managed/resolvers/global.rs index cfc57e591..4ffcb251f 100644 --- a/cli/npm/managed/resolvers/global.rs +++ b/cli/npm/managed/resolvers/global.rs @@ -20,8 +20,11 @@ use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; +use crate::http_util::HttpClient; + use super::super::super::common::types_package_name; use super::super::cache::NpmCache; +use super::super::cache::TarballCache; use super::super::resolution::NpmResolution; use super::common::cache_packages; use super::common::NpmPackageFsResolver; @@ -31,6 +34,7 @@ use super::common::RegistryReadPermissionChecker; #[derive(Debug)] pub struct GlobalNpmPackageResolver { cache: Arc, + tarball_cache: Arc, resolution: Arc, system_info: NpmSystemInfo, registry_read_permission_checker: RegistryReadPermissionChecker, @@ -38,19 +42,21 @@ pub struct GlobalNpmPackageResolver { impl GlobalNpmPackageResolver { pub fn new( - fs: Arc, cache: Arc, + fs: Arc, + tarball_cache: Arc, resolution: Arc, system_info: NpmSystemInfo, ) -> Self { Self { - cache: cache.clone(), - resolution, - system_info, registry_read_permission_checker: RegistryReadPermissionChecker::new( fs, cache.root_folder(), ), + cache, + tarball_cache, + resolution, + system_info, } } @@ -123,12 +129,20 @@ impl NpmPackageFsResolver for GlobalNpmPackageResolver { ) } - async fn cache_packages(&self) -> Result<(), AnyError> { + async fn cache_packages( + &self, + http_client: &Arc, + ) -> Result<(), AnyError> { let package_partitions = self .resolution .all_system_packages_partitioned(&self.system_info); - cache_packages(package_partitions.packages, &self.cache).await?; + cache_packages( + package_partitions.packages, + &self.tarball_cache, + http_client, + ) + .await?; // create the copy package folders for copy in package_partitions.copy_packages { diff --git a/cli/npm/managed/resolvers/local.rs b/cli/npm/managed/resolvers/local.rs index f0c2a3f65..5c3b1f15e 100644 --- a/cli/npm/managed/resolvers/local.rs +++ b/cli/npm/managed/resolvers/local.rs @@ -14,6 +14,7 @@ use std::path::PathBuf; use std::sync::Arc; use crate::cache::CACHE_PERM; +use crate::http_util::HttpClient; use crate::npm::cache_dir::mixed_case_package_name_decode; use crate::util::fs::atomic_write_file; use crate::util::fs::canonicalize_path_maybe_not_exists_with_fs; @@ -27,16 +28,15 @@ use deno_ast::ModuleSpecifier; use deno_core::anyhow::bail; use deno_core::anyhow::Context; use deno_core::error::AnyError; +use deno_core::futures::stream::FuturesUnordered; +use deno_core::futures::StreamExt; use deno_core::parking_lot::Mutex; -use deno_core::unsync::spawn; -use deno_core::unsync::JoinHandle; use deno_core::url::Url; use deno_npm::resolution::NpmResolutionSnapshot; use deno_npm::NpmPackageCacheFolderId; use deno_npm::NpmPackageId; use deno_npm::NpmResolutionPackage; use deno_npm::NpmSystemInfo; -use deno_runtime::deno_core::futures; use deno_runtime::deno_fs; use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; @@ -48,6 +48,7 @@ use crate::npm::cache_dir::mixed_case_package_name_encode; use super::super::super::common::types_package_name; use super::super::cache::NpmCache; +use super::super::cache::TarballCache; use super::super::resolution::NpmResolution; use super::common::NpmPackageFsResolver; use super::common::RegistryReadPermissionChecker; @@ -56,10 +57,11 @@ use super::common::RegistryReadPermissionChecker; /// and resolves packages from it. #[derive(Debug)] pub struct LocalNpmPackageResolver { - fs: Arc, cache: Arc, + fs: Arc, progress_bar: ProgressBar, resolution: Arc, + tarball_cache: Arc, root_node_modules_path: PathBuf, root_node_modules_url: Url, system_info: NpmSystemInfo, @@ -68,26 +70,28 @@ pub struct LocalNpmPackageResolver { impl LocalNpmPackageResolver { pub fn new( - fs: Arc, cache: Arc, + fs: Arc, progress_bar: ProgressBar, - node_modules_folder: PathBuf, resolution: Arc, + tarball_cache: Arc, + node_modules_folder: PathBuf, system_info: NpmSystemInfo, ) -> Self { Self { - fs: fs.clone(), cache, + fs: fs.clone(), progress_bar, resolution, - root_node_modules_url: Url::from_directory_path(&node_modules_folder) - .unwrap(), - root_node_modules_path: node_modules_folder.clone(), - system_info, + tarball_cache, registry_read_permission_checker: RegistryReadPermissionChecker::new( fs, - node_modules_folder, + node_modules_folder.clone(), ), + root_node_modules_url: Url::from_directory_path(&node_modules_folder) + .unwrap(), + root_node_modules_path: node_modules_folder, + system_info, } } @@ -225,11 +229,16 @@ impl NpmPackageFsResolver for LocalNpmPackageResolver { Ok(get_package_folder_id_from_folder_name(&folder_name)) } - async fn cache_packages(&self) -> Result<(), AnyError> { + async fn cache_packages( + &self, + http_client: &Arc, + ) -> Result<(), AnyError> { sync_resolution_with_fs( &self.resolution.snapshot(), &self.cache, + http_client, &self.progress_bar, + &self.tarball_cache, &self.root_node_modules_path, &self.system_info, ) @@ -251,7 +260,9 @@ impl NpmPackageFsResolver for LocalNpmPackageResolver { async fn sync_resolution_with_fs( snapshot: &NpmResolutionSnapshot, cache: &Arc, + http_client: &Arc, progress_bar: &ProgressBar, + tarball_cache: &Arc, root_node_modules_dir_path: &Path, system_info: &NpmSystemInfo, ) -> Result<(), AnyError> { @@ -288,8 +299,7 @@ async fn sync_resolution_with_fs( // node_modules/.deno//node_modules/ let package_partitions = snapshot.all_system_packages_partitioned(system_info); - let mut handles: Vec>> = - Vec::with_capacity(package_partitions.packages.len()); + let mut cache_futures = FuturesUnordered::new(); let mut newest_packages_by_name: HashMap<&String, &NpmResolutionPackage> = HashMap::with_capacity(package_partitions.packages.len()); let bin_entries = Arc::new(Mutex::new(bin_entries::BinEntries::new())); @@ -317,21 +327,19 @@ async fn sync_resolution_with_fs( // are forced to be recreated setup_cache.remove_dep(&package_folder_name); - let pb = progress_bar.clone(); - let cache = cache.clone(); - let package = package.clone(); let bin_entries_to_setup = bin_entries.clone(); - let handle = spawn(async move { - cache.ensure_package(&package.id.nv, &package.dist).await?; - let pb_guard = pb.update_with_prompt( + cache_futures.push(async move { + tarball_cache + .ensure_package(&package.id.nv, &package.dist, http_client) + .await?; + let pb_guard = progress_bar.update_with_prompt( ProgressMessagePrompt::Initialize, &package.id.nv.to_string(), ); let sub_node_modules = folder_path.join("node_modules"); let package_path = join_package_name(&sub_node_modules, &package.id.nv.name); - let cache_folder = - cache.package_folder_for_name_and_version(&package.id.nv); + let cache_folder = cache.package_folder_for_nv(&package.id.nv); deno_core::unsync::spawn_blocking({ let package_path = package_path.clone(); @@ -353,15 +361,13 @@ async fn sync_resolution_with_fs( // finally stop showing the progress bar drop(pb_guard); // explicit for clarity - Ok(()) + Ok::<_, AnyError>(()) }); - handles.push(handle); } } - let results = futures::future::join_all(handles).await; - for result in results { - result??; // surface the first error + while let Some(result) = cache_futures.next().await { + result?; // surface the first error } // 2. Create any "copy" packages, which are used for peer dependencies diff --git a/cli/npm/managed/resolvers/mod.rs b/cli/npm/managed/resolvers/mod.rs index d5472344a..5f0343805 100644 --- a/cli/npm/managed/resolvers/mod.rs +++ b/cli/npm/managed/resolvers/mod.rs @@ -7,6 +7,7 @@ mod local; use std::path::PathBuf; use std::sync::Arc; +use deno_npm::npm_rc::ResolvedNpmRc; use deno_npm::NpmSystemInfo; use deno_runtime::deno_fs::FileSystem; @@ -18,28 +19,38 @@ use self::global::GlobalNpmPackageResolver; use self::local::LocalNpmPackageResolver; use super::cache::NpmCache; +use super::cache::TarballCache; use super::resolution::NpmResolution; pub fn create_npm_fs_resolver( fs: Arc, - cache: Arc, + npm_cache: Arc, + npm_rc: Arc, progress_bar: &ProgressBar, resolution: Arc, maybe_node_modules_path: Option, system_info: NpmSystemInfo, ) -> Arc { + let tarball_cache = Arc::new(TarballCache::new( + npm_cache.clone(), + fs.clone(), + npm_rc, + progress_bar.clone(), + )); match maybe_node_modules_path { Some(node_modules_folder) => Arc::new(LocalNpmPackageResolver::new( + npm_cache, fs, - cache, progress_bar.clone(), - node_modules_folder, resolution, + tarball_cache, + node_modules_folder, system_info, )), None => Arc::new(GlobalNpmPackageResolver::new( + npm_cache, fs, - cache, + tarball_cache, resolution, system_info, )), diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs deleted file mode 100644 index e2d242e66..000000000 --- a/cli/npm/managed/tarball.rs +++ /dev/null @@ -1,324 +0,0 @@ -// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. - -use std::collections::HashSet; -use std::fs; -use std::io::ErrorKind; -use std::path::Path; -use std::path::PathBuf; - -use base64::prelude::BASE64_STANDARD; -use base64::Engine; -use deno_core::anyhow::bail; -use deno_core::anyhow::Context; -use deno_core::error::AnyError; -use deno_npm::registry::NpmPackageVersionDistInfo; -use deno_npm::registry::NpmPackageVersionDistInfoIntegrity; -use deno_semver::package::PackageNv; -use flate2::read::GzDecoder; -use tar::Archive; -use tar::EntryType; - -use crate::util::path::get_atomic_dir_path; - -#[derive(Debug, Copy, Clone)] -pub enum TarballExtractionMode { - /// Overwrites the destination directory without deleting any files. - Overwrite, - /// Creates and writes to a sibling temporary directory. When done, moves - /// it to the final destination. - /// - /// This is more robust than `Overwrite` as it better handles multiple - /// processes writing to the directory at the same time. - SiblingTempDir, -} - -pub fn verify_and_extract_tarball( - package_nv: &PackageNv, - data: &[u8], - dist_info: &NpmPackageVersionDistInfo, - output_folder: &Path, - extraction_mode: TarballExtractionMode, -) -> Result<(), AnyError> { - verify_tarball_integrity(package_nv, data, &dist_info.integrity())?; - - match extraction_mode { - TarballExtractionMode::Overwrite => extract_tarball(data, output_folder), - TarballExtractionMode::SiblingTempDir => { - let temp_dir = get_atomic_dir_path(output_folder); - extract_tarball(data, &temp_dir)?; - rename_with_retries(&temp_dir, output_folder) - .map_err(AnyError::from) - .context("Failed moving extracted tarball to final destination.") - } - } -} - -fn rename_with_retries( - temp_dir: &Path, - output_folder: &Path, -) -> Result<(), std::io::Error> { - fn already_exists(err: &std::io::Error, output_folder: &Path) -> bool { - // Windows will do an "Access is denied" error - err.kind() == ErrorKind::AlreadyExists || output_folder.exists() - } - - let mut count = 0; - // renaming might be flaky if a lot of processes are trying - // to do this, so retry a few times - loop { - match fs::rename(temp_dir, output_folder) { - Ok(_) => return Ok(()), - Err(err) if already_exists(&err, output_folder) => { - // another process copied here, just cleanup - let _ = fs::remove_dir_all(temp_dir); - return Ok(()); - } - Err(err) => { - count += 1; - if count > 5 { - // too many retries, cleanup and return the error - let _ = fs::remove_dir_all(temp_dir); - return Err(err); - } - - // wait a bit before retrying... this should be very rare or only - // in error cases, so ok to sleep a bit - let sleep_ms = std::cmp::min(100, 20 * count); - std::thread::sleep(std::time::Duration::from_millis(sleep_ms)); - } - } - } -} - -fn verify_tarball_integrity( - package: &PackageNv, - data: &[u8], - npm_integrity: &NpmPackageVersionDistInfoIntegrity, -) -> Result<(), AnyError> { - use ring::digest::Context; - let (tarball_checksum, expected_checksum) = match npm_integrity { - NpmPackageVersionDistInfoIntegrity::Integrity { - algorithm, - base64_hash, - } => { - let algo = match *algorithm { - "sha512" => &ring::digest::SHA512, - "sha1" => &ring::digest::SHA1_FOR_LEGACY_USE_ONLY, - hash_kind => bail!( - "Not implemented hash function for {}: {}", - package, - hash_kind - ), - }; - let mut hash_ctx = Context::new(algo); - hash_ctx.update(data); - let digest = hash_ctx.finish(); - let tarball_checksum = BASE64_STANDARD.encode(digest.as_ref()); - (tarball_checksum, base64_hash) - } - NpmPackageVersionDistInfoIntegrity::LegacySha1Hex(hex) => { - let mut hash_ctx = Context::new(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY); - hash_ctx.update(data); - let digest = hash_ctx.finish(); - let tarball_checksum = faster_hex::hex_string(digest.as_ref()); - (tarball_checksum, hex) - } - NpmPackageVersionDistInfoIntegrity::UnknownIntegrity(integrity) => { - bail!( - "Not implemented integrity kind for {}: {}", - package, - integrity - ) - } - }; - - if tarball_checksum != *expected_checksum { - bail!( - "Tarball checksum did not match what was provided by npm registry for {}.\n\nExpected: {}\nActual: {}", - package, - expected_checksum, - tarball_checksum, - ) - } - Ok(()) -} - -fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> { - fs::create_dir_all(output_folder)?; - let output_folder = fs::canonicalize(output_folder)?; - let tar = GzDecoder::new(data); - let mut archive = Archive::new(tar); - archive.set_overwrite(true); - archive.set_preserve_permissions(true); - let mut created_dirs = HashSet::new(); - - for entry in archive.entries()? { - let mut entry = entry?; - let path = entry.path()?; - let entry_type = entry.header().entry_type(); - - // Some package tarballs contain "pax_global_header", these entries - // should be skipped. - if entry_type == EntryType::XGlobalHeader { - continue; - } - - // skip the first component which will be either "package" or the name of the package - let relative_path = path.components().skip(1).collect::(); - let absolute_path = output_folder.join(relative_path); - let dir_path = if entry_type == EntryType::Directory { - absolute_path.as_path() - } else { - absolute_path.parent().unwrap() - }; - if created_dirs.insert(dir_path.to_path_buf()) { - fs::create_dir_all(dir_path)?; - let canonicalized_dir = fs::canonicalize(dir_path)?; - if !canonicalized_dir.starts_with(&output_folder) { - bail!( - "Extracted directory '{}' of npm tarball was not in output directory.", - canonicalized_dir.display() - ) - } - } - - let entry_type = entry.header().entry_type(); - match entry_type { - EntryType::Regular => { - entry.unpack(&absolute_path)?; - } - EntryType::Symlink | EntryType::Link => { - // At the moment, npm doesn't seem to support uploading hardlinks or - // symlinks to the npm registry. If ever adding symlink or hardlink - // support, we will need to validate that the hardlink and symlink - // target are within the package directory. - log::warn!( - "Ignoring npm tarball entry type {:?} for '{}'", - entry_type, - absolute_path.display() - ) - } - _ => { - // ignore - } - } - } - Ok(()) -} - -#[cfg(test)] -mod test { - use deno_semver::Version; - use test_util::TempDir; - - use super::*; - - #[test] - pub fn test_verify_tarball() { - let package = PackageNv { - name: "package".to_string(), - version: Version::parse_from_npm("1.0.0").unwrap(), - }; - let actual_checksum = - "z4PhNX7vuL3xVChQ1m2AB9Yg5AULVxXcg/SpIdNs6c5H0NE8XYXysP+DGNKHfuwvY7kxvUdBeoGlODJ6+SfaPg=="; - assert_eq!( - verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::UnknownIntegrity("test") - ) - .unwrap_err() - .to_string(), - "Not implemented integrity kind for package@1.0.0: test", - ); - assert_eq!( - verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::Integrity { - algorithm: "notimplemented", - base64_hash: "test" - } - ) - .unwrap_err() - .to_string(), - "Not implemented hash function for package@1.0.0: notimplemented", - ); - assert_eq!( - verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::Integrity { - algorithm: "sha1", - base64_hash: "test" - } - ) - .unwrap_err() - .to_string(), - concat!( - "Tarball checksum did not match what was provided by npm ", - "registry for package@1.0.0.\n\nExpected: test\nActual: 2jmj7l5rSw0yVb/vlWAYkK/YBwk=", - ), - ); - assert_eq!( - verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::Integrity { - algorithm: "sha512", - base64_hash: "test" - } - ) - .unwrap_err() - .to_string(), - format!("Tarball checksum did not match what was provided by npm registry for package@1.0.0.\n\nExpected: test\nActual: {actual_checksum}"), - ); - assert!(verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::Integrity { - algorithm: "sha512", - base64_hash: actual_checksum, - }, - ) - .is_ok()); - let actual_hex = "da39a3ee5e6b4b0d3255bfef95601890afd80709"; - assert_eq!( - verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::LegacySha1Hex("test"), - ) - .unwrap_err() - .to_string(), - format!("Tarball checksum did not match what was provided by npm registry for package@1.0.0.\n\nExpected: test\nActual: {actual_hex}"), - ); - assert!(verify_tarball_integrity( - &package, - &Vec::new(), - &NpmPackageVersionDistInfoIntegrity::LegacySha1Hex(actual_hex), - ) - .is_ok()); - } - - #[test] - fn rename_with_retries_succeeds_exists() { - let temp_dir = TempDir::new(); - let folder_1 = temp_dir.path().join("folder_1"); - let folder_2 = temp_dir.path().join("folder_2"); - - folder_1.create_dir_all(); - folder_1.join("a.txt").write("test"); - folder_2.create_dir_all(); - // this will not end up in the output as rename_with_retries assumes - // the folders ending up at the destination are the same - folder_2.join("b.txt").write("test2"); - - let dest_folder = temp_dir.path().join("dest_folder"); - - rename_with_retries(folder_1.as_path(), dest_folder.as_path()).unwrap(); - rename_with_retries(folder_2.as_path(), dest_folder.as_path()).unwrap(); - assert!(dest_folder.join("a.txt").exists()); - assert!(!dest_folder.join("b.txt").exists()); - } -} diff --git a/tests/integration/npm_tests.rs b/tests/integration/npm_tests.rs index 923d151be..2c074b86f 100644 --- a/tests/integration/npm_tests.rs +++ b/tests/integration/npm_tests.rs @@ -1665,12 +1665,6 @@ itest!(non_existent_dep_version { http_server: true, exit_code: 1, output_str: Some(concat!( - "[UNORDERED_START]\n", - "Download http://localhost:4260/@denotest/non-existent-dep-version\n", - "Download http://localhost:4260/@denotest/esm-basic\n", - "[UNORDERED_END]\n", - // does two downloads because when failing once it max tries to - // get the latest version a second time "[UNORDERED_START]\n", "Download http://localhost:4260/@denotest/non-existent-dep-version\n", "Download http://localhost:4260/@denotest/esm-basic\n", diff --git a/tests/testdata/npm/deno_run_non_existent.out b/tests/testdata/npm/deno_run_non_existent.out index b769eab8d..247e650e6 100644 --- a/tests/testdata/npm/deno_run_non_existent.out +++ b/tests/testdata/npm/deno_run_non_existent.out @@ -1,3 +1,2 @@ Download http://localhost:4260/mkdirp -Download http://localhost:4260/mkdirp error: Could not find npm package 'mkdirp' matching '0.5.125'. -- cgit v1.2.3