diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2024-06-02 21:39:13 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-03 01:39:13 +0000 |
commit | b1f776adef6f0d0caa0b2badf9fb707cf5efa6e7 (patch) | |
tree | df801e53bb5e43268933d883f049546256ef8e7f /cli/npm | |
parent | eda43c46de12ed589fdbe62ba0574887cfbb3574 (diff) |
refactor: extract structs for downloading tarballs and npm registry packuments (#24067)
Diffstat (limited to 'cli/npm')
-rw-r--r-- | cli/npm/cache_dir.rs | 4 | ||||
-rw-r--r-- | cli/npm/managed/cache/mod.rs (renamed from cli/npm/managed/cache.rs) | 159 | ||||
-rw-r--r-- | cli/npm/managed/cache/registry_info.rs | 284 | ||||
-rw-r--r-- | cli/npm/managed/cache/tarball.rs | 210 | ||||
-rw-r--r-- | cli/npm/managed/cache/tarball_extract.rs (renamed from cli/npm/managed/tarball.rs) | 0 | ||||
-rw-r--r-- | cli/npm/managed/mod.rs | 127 | ||||
-rw-r--r-- | cli/npm/managed/registry.rs | 181 | ||||
-rw-r--r-- | cli/npm/managed/resolvers/common.rs | 28 | ||||
-rw-r--r-- | cli/npm/managed/resolvers/global.rs | 26 | ||||
-rw-r--r-- | cli/npm/managed/resolvers/local.rs | 62 | ||||
-rw-r--r-- | cli/npm/managed/resolvers/mod.rs | 19 |
11 files changed, 720 insertions, 380 deletions
diff --git a/cli/npm/cache_dir.rs b/cli/npm/cache_dir.rs index d51913775..1f1f5e956 100644 --- a/cli/npm/cache_dir.rs +++ b/cli/npm/cache_dir.rs @@ -72,7 +72,7 @@ impl NpmCacheDir { registry_url: &Url, ) -> PathBuf { if folder_id.copy_index == 0 { - self.package_folder_for_name_and_version(&folder_id.nv, registry_url) + self.package_folder_for_nv(&folder_id.nv, registry_url) } else { self .package_name_folder(&folder_id.nv.name, registry_url) @@ -80,7 +80,7 @@ impl NpmCacheDir { } } - pub fn package_folder_for_name_and_version( + pub fn package_folder_for_nv( &self, package: &PackageNv, registry_url: &Url, diff --git a/cli/npm/managed/cache.rs b/cli/npm/managed/cache/mod.rs index 4056c97ad..f409744b9 100644 --- a/cli/npm/managed/cache.rs +++ b/cli/npm/managed/cache/mod.rs @@ -2,6 +2,7 @@ use std::collections::HashSet; use std::fs; +use std::io::ErrorKind; use std::path::Path; use std::path::PathBuf; use std::sync::Arc; @@ -9,35 +10,34 @@ use std::sync::Arc; use deno_ast::ModuleSpecifier; use deno_core::anyhow::bail; use deno_core::anyhow::Context; -use deno_core::error::custom_error; use deno_core::error::AnyError; use deno_core::parking_lot::Mutex; +use deno_core::serde_json; use deno_core::url::Url; use deno_npm::npm_rc::ResolvedNpmRc; -use deno_npm::registry::NpmPackageVersionDistInfo; +use deno_npm::registry::NpmPackageInfo; use deno_npm::NpmPackageCacheFolderId; -use deno_runtime::deno_fs; use deno_semver::package::PackageNv; use crate::args::CacheSetting; -use crate::http_util::HttpClient; -use crate::npm::common::maybe_auth_header_for_npm_registry; +use crate::cache::CACHE_PERM; use crate::npm::NpmCacheDir; +use crate::util::fs::atomic_write_file_with_retries; use crate::util::fs::hard_link_dir_recursive; -use crate::util::progress_bar::ProgressBar; -use super::tarball::verify_and_extract_tarball; -use super::tarball::TarballExtractionMode; +mod registry_info; +mod tarball; +mod tarball_extract; + +pub use registry_info::RegistryInfoDownloader; +pub use tarball::TarballCache; /// Stores a single copy of npm packages in a cache. #[derive(Debug)] pub struct NpmCache { cache_dir: NpmCacheDir, cache_setting: CacheSetting, - fs: Arc<dyn deno_fs::FileSystem>, - http_client: Arc<HttpClient>, - progress_bar: ProgressBar, - pub(crate) npmrc: Arc<ResolvedNpmRc>, + npmrc: Arc<ResolvedNpmRc>, /// ensures a package is only downloaded once per run previously_reloaded_packages: Mutex<HashSet<PackageNv>>, } @@ -46,17 +46,11 @@ impl NpmCache { pub fn new( cache_dir: NpmCacheDir, cache_setting: CacheSetting, - fs: Arc<dyn deno_fs::FileSystem>, - http_client: Arc<HttpClient>, - progress_bar: ProgressBar, npmrc: Arc<ResolvedNpmRc>, ) -> Self { Self { cache_dir, cache_setting, - fs, - http_client, - progress_bar, previously_reloaded_packages: Default::default(), npmrc, } @@ -75,7 +69,7 @@ impl NpmCache { /// to ensure a package is only downloaded once per run of the CLI. This /// prevents downloads from re-occurring when someone has `--reload` and /// and imports a dynamic import that imports the same package again for example. - fn should_use_cache_for_package(&self, package: &PackageNv) -> bool { + pub fn should_use_cache_for_package(&self, package: &PackageNv) -> bool { self.cache_setting.should_use_for_npm_package(&package.name) || !self .previously_reloaded_packages @@ -83,88 +77,6 @@ impl NpmCache { .insert(package.clone()) } - pub async fn ensure_package( - &self, - package: &PackageNv, - dist: &NpmPackageVersionDistInfo, - ) -> Result<(), AnyError> { - self - .ensure_package_inner(package, dist) - .await - .with_context(|| format!("Failed caching npm package '{package}'.")) - } - - async fn ensure_package_inner( - &self, - package_nv: &PackageNv, - dist: &NpmPackageVersionDistInfo, - ) -> Result<(), AnyError> { - let registry_url = self.npmrc.get_registry_url(&package_nv.name); - let registry_config = self.npmrc.get_registry_config(&package_nv.name); - - let package_folder = self - .cache_dir - .package_folder_for_name_and_version(package_nv, registry_url); - let should_use_cache = self.should_use_cache_for_package(package_nv); - let package_folder_exists = self.fs.exists_sync(&package_folder); - if should_use_cache && package_folder_exists { - return Ok(()); - } else if self.cache_setting == CacheSetting::Only { - return Err(custom_error( - "NotCached", - format!( - "An npm specifier not found in cache: \"{}\", --cached-only is specified.", - &package_nv.name - ) - ) - ); - } - - if dist.tarball.is_empty() { - bail!("Tarball URL was empty."); - } - - let maybe_auth_header = maybe_auth_header_for_npm_registry(registry_config); - - let guard = self.progress_bar.update(&dist.tarball); - let maybe_bytes = self - .http_client - .download_with_progress(&dist.tarball, maybe_auth_header, &guard) - .await?; - match maybe_bytes { - Some(bytes) => { - let extraction_mode = if should_use_cache || !package_folder_exists { - TarballExtractionMode::SiblingTempDir - } else { - // The user ran with `--reload`, so overwrite the package instead of - // deleting it since the package might get corrupted if a user kills - // their deno process while it's deleting a package directory - // - // We can't rename this folder and delete it because the folder - // may be in use by another process or may now contain hardlinks, - // which will cause windows to throw an "AccessDenied" error when - // renaming. So we settle for overwriting. - TarballExtractionMode::Overwrite - }; - let dist = dist.clone(); - let package_nv = package_nv.clone(); - deno_core::unsync::spawn_blocking(move || { - verify_and_extract_tarball( - &package_nv, - &bytes, - &dist, - &package_folder, - extraction_mode, - ) - }) - .await? - } - None => { - bail!("Could not find npm package tarball at: {}", dist.tarball); - } - } - } - /// Ensures a copy of the package exists in the global cache. /// /// This assumes that the original package folder being hard linked @@ -190,7 +102,7 @@ impl NpmCache { let original_package_folder = self .cache_dir - .package_folder_for_name_and_version(&folder_id.nv, registry_url); + .package_folder_for_nv(&folder_id.nv, registry_url); // it seems Windows does an "AccessDenied" error when moving a // directory with hard links, so that's why this solution is done @@ -205,14 +117,17 @@ impl NpmCache { self.cache_dir.package_folder_for_id(id, registry_url) } - pub fn package_folder_for_name_and_version( + pub fn package_folder_for_nv(&self, package: &PackageNv) -> PathBuf { + let registry_url = self.npmrc.get_registry_url(&package.name); + self.package_folder_for_nv_and_url(package, registry_url) + } + + pub fn package_folder_for_nv_and_url( &self, package: &PackageNv, + registry_url: &Url, ) -> PathBuf { - let registry_url = self.npmrc.get_registry_url(&package.name); - self - .cache_dir - .package_folder_for_name_and_version(package, registry_url) + self.cache_dir.package_folder_for_nv(package, registry_url) } pub fn package_name_folder(&self, name: &str) -> PathBuf { @@ -232,6 +147,36 @@ impl NpmCache { .cache_dir .resolve_package_folder_id_from_specifier(specifier) } + + pub fn load_package_info( + &self, + name: &str, + ) -> Result<Option<NpmPackageInfo>, AnyError> { + let file_cache_path = self.get_registry_package_info_file_cache_path(name); + + let file_text = match fs::read_to_string(file_cache_path) { + Ok(file_text) => file_text, + Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err.into()), + }; + Ok(serde_json::from_str(&file_text)?) + } + + pub fn save_package_info( + &self, + name: &str, + package_info: &NpmPackageInfo, + ) -> Result<(), AnyError> { + let file_cache_path = self.get_registry_package_info_file_cache_path(name); + let file_text = serde_json::to_string(&package_info)?; + atomic_write_file_with_retries(&file_cache_path, file_text, CACHE_PERM)?; + Ok(()) + } + + fn get_registry_package_info_file_cache_path(&self, name: &str) -> PathBuf { + let name_folder_path = self.package_name_folder(name); + name_folder_path.join("registry.json") + } } const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock"; diff --git a/cli/npm/managed/cache/registry_info.rs b/cli/npm/managed/cache/registry_info.rs new file mode 100644 index 000000000..ea6b47969 --- /dev/null +++ b/cli/npm/managed/cache/registry_info.rs @@ -0,0 +1,284 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashMap; +use std::sync::Arc; + +use deno_core::anyhow::anyhow; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::custom_error; +use deno_core::error::AnyError; +use deno_core::futures::future::BoxFuture; +use deno_core::futures::future::Shared; +use deno_core::futures::FutureExt; +use deno_core::parking_lot::Mutex; +use deno_core::serde_json; +use deno_core::url::Url; +use deno_npm::npm_rc::RegistryConfig; +use deno_npm::npm_rc::ResolvedNpmRc; +use deno_npm::registry::NpmPackageInfo; + +use crate::args::CacheSetting; +use crate::http_util::HttpClient; +use crate::npm::common::maybe_auth_header_for_npm_registry; +use crate::util::progress_bar::ProgressBar; + +use super::NpmCache; + +// todo(dsherret): create seams and unit test this + +#[derive(Debug, Clone)] +enum MemoryCacheItem { + /// The cache item hasn't loaded yet. + PendingFuture(Shared<PendingRegistryLoadFuture>), + /// The item has loaded in the past and was stored in the file system cache. + /// There is no reason to request this package from the npm registry again + /// for the duration of execution. + FsCached, + /// An item is memory cached when it fails saving to the file system cache + /// or the package does not exist. + MemoryCached(Result<Option<Arc<NpmPackageInfo>>, Arc<AnyError>>), +} + +#[derive(Debug, Clone)] +enum FutureResult { + PackageNotExists, + SavedFsCache(Arc<NpmPackageInfo>), + ErroredFsCache(Arc<NpmPackageInfo>), +} + +type PendingRegistryLoadFuture = + BoxFuture<'static, Result<FutureResult, Arc<AnyError>>>; + +/// Downloads packuments from the npm registry. +/// +/// This is shared amongst all the workers. +#[derive(Debug)] +pub struct RegistryInfoDownloader { + cache: Arc<NpmCache>, + npmrc: Arc<ResolvedNpmRc>, + progress_bar: ProgressBar, + memory_cache: Mutex<HashMap<String, MemoryCacheItem>>, +} + +impl RegistryInfoDownloader { + pub fn new( + cache: Arc<NpmCache>, + npmrc: Arc<ResolvedNpmRc>, + progress_bar: ProgressBar, + ) -> Self { + Self { + cache, + npmrc, + progress_bar, + memory_cache: Default::default(), + } + } + + pub async fn load_package_info( + &self, + name: &str, + current_runtime_http_client: &Arc<HttpClient>, + ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> { + let registry_url = self.npmrc.get_registry_url(name); + let registry_config = self.npmrc.get_registry_config(name); + + self + .load_package_info_inner( + name, + registry_url, + registry_config, + current_runtime_http_client, + ) + .await + .with_context(|| { + format!( + "Error getting response at {} for package \"{}\"", + self.get_package_url(name, registry_url), + name + ) + }) + } + + async fn load_package_info_inner( + &self, + name: &str, + registry_url: &Url, + registry_config: &RegistryConfig, + current_runtime_http_client: &Arc<HttpClient>, + ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> { + if *self.cache.cache_setting() == CacheSetting::Only { + return Err(custom_error( + "NotCached", + format!( + "An npm specifier not found in cache: \"{name}\", --cached-only is specified." + ) + )); + } + + let (created, cache_item) = { + let mut mem_cache = self.memory_cache.lock(); + if let Some(cache_item) = mem_cache.get(name) { + (false, cache_item.clone()) + } else { + let future = self.create_load_future( + name, + registry_url, + registry_config, + current_runtime_http_client, + ); + let cache_item = MemoryCacheItem::PendingFuture(future); + mem_cache.insert(name.to_string(), cache_item.clone()); + (true, cache_item) + } + }; + match cache_item { + MemoryCacheItem::FsCached => { + // this struct previously loaded from the registry, so we can load it from the file system cache + self + .load_file_cached_package_info(name) + .await + .map(|info| Some(Arc::new(info))) + } + MemoryCacheItem::MemoryCached(maybe_info) => { + maybe_info.clone().map_err(|e| anyhow!("{}", e)) + } + MemoryCacheItem::PendingFuture(future) => { + if created { + match future.await { + Ok(FutureResult::SavedFsCache(info)) => { + // return back the future and mark this package as having + // been saved in the cache for next time it's requested + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::FsCached; + Ok(Some(info)) + } + Ok(FutureResult::ErroredFsCache(info)) => { + // since saving to the fs cache failed, keep the package information in memory + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::MemoryCached(Ok(Some(info.clone()))); + Ok(Some(info)) + } + Ok(FutureResult::PackageNotExists) => { + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::MemoryCached(Ok(None)); + Ok(None) + } + Err(err) => { + let return_err = anyhow!("{}", err); + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::MemoryCached(Err(err)); + Err(return_err) + } + } + } else { + match future.await { + Ok(FutureResult::SavedFsCache(info)) => Ok(Some(info)), + Ok(FutureResult::ErroredFsCache(info)) => Ok(Some(info)), + Ok(FutureResult::PackageNotExists) => Ok(None), + Err(err) => Err(anyhow!("{}", err)), + } + } + } + } + } + + async fn load_file_cached_package_info( + &self, + name: &str, + ) -> Result<NpmPackageInfo, AnyError> { + // this scenario failing should be exceptionally rare so let's + // deal with improving it only when anyone runs into an issue + let maybe_package_info = deno_core::unsync::spawn_blocking({ + let cache = self.cache.clone(); + let name = name.to_string(); + move || cache.load_package_info(&name) + }) + .await + .unwrap() + .with_context(|| { + format!( + "Previously saved '{}' from the npm cache, but now it fails to load.", + name + ) + })?; + match maybe_package_info { + Some(package_info) => Ok(package_info), + None => { + bail!("The package '{}' previously saved its registry information to the file system cache, but that file no longer exists.", name) + } + } + } + + fn create_load_future( + &self, + name: &str, + registry_url: &Url, + registry_config: &RegistryConfig, + current_runtime_http_client: &Arc<HttpClient>, + ) -> Shared<PendingRegistryLoadFuture> { + let package_url = self.get_package_url(name, registry_url); + let maybe_auth_header = maybe_auth_header_for_npm_registry(registry_config); + let guard = self.progress_bar.update(package_url.as_str()); + let cache = self.cache.clone(); + let http_client = current_runtime_http_client.clone(); + let name = name.to_string(); + // force this future to be polled on the current runtime because it's not + // safe to share `HttpClient`s across runtimes and because a restart of + // npm resolution might cause this package not to be resolved again + // causing the future to never be polled + deno_core::unsync::spawn(async move { + let maybe_bytes = http_client + .download_with_progress(package_url, maybe_auth_header, &guard) + .await?; + match maybe_bytes { + Some(bytes) => { + let future_result = deno_core::unsync::spawn_blocking( + move || -> Result<FutureResult, AnyError> { + let package_info = serde_json::from_slice(&bytes)?; + match cache.save_package_info(&name, &package_info) { + Ok(()) => { + Ok(FutureResult::SavedFsCache(Arc::new(package_info))) + } + Err(err) => { + log::debug!( + "Error saving package {} to cache: {:#}", + name, + err + ); + Ok(FutureResult::ErroredFsCache(Arc::new(package_info))) + } + } + }, + ) + .await??; + Ok(future_result) + } + None => Ok(FutureResult::PackageNotExists), + } + }) + .map(|result| result.unwrap().map_err(Arc::new)) + .boxed() + .shared() + } + + fn get_package_url(&self, name: &str, registry_url: &Url) -> Url { + // list of all characters used in npm packages: + // !, ', (, ), *, -, ., /, [0-9], @, [A-Za-z], _, ~ + const ASCII_SET: percent_encoding::AsciiSet = + percent_encoding::NON_ALPHANUMERIC + .remove(b'!') + .remove(b'\'') + .remove(b'(') + .remove(b')') + .remove(b'*') + .remove(b'-') + .remove(b'.') + .remove(b'/') + .remove(b'@') + .remove(b'_') + .remove(b'~'); + let name = percent_encoding::utf8_percent_encode(name, &ASCII_SET); + registry_url.join(&name.to_string()).unwrap() + } +} diff --git a/cli/npm/managed/cache/tarball.rs b/cli/npm/managed/cache/tarball.rs new file mode 100644 index 000000000..9848aca13 --- /dev/null +++ b/cli/npm/managed/cache/tarball.rs @@ -0,0 +1,210 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashMap; +use std::sync::Arc; + +use deno_core::anyhow::anyhow; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::custom_error; +use deno_core::error::AnyError; +use deno_core::futures::future::BoxFuture; +use deno_core::futures::future::Shared; +use deno_core::futures::FutureExt; +use deno_core::parking_lot::Mutex; +use deno_npm::npm_rc::ResolvedNpmRc; +use deno_npm::registry::NpmPackageVersionDistInfo; +use deno_runtime::deno_fs::FileSystem; +use deno_semver::package::PackageNv; + +use crate::args::CacheSetting; +use crate::http_util::HttpClient; +use crate::npm::common::maybe_auth_header_for_npm_registry; +use crate::util::progress_bar::ProgressBar; + +use super::tarball_extract::verify_and_extract_tarball; +use super::tarball_extract::TarballExtractionMode; +use super::NpmCache; + +// todo(dsherret): create seams and unit test this + +#[derive(Debug, Clone)] +enum MemoryCacheItem { + /// The cache item hasn't finished yet. + PendingFuture(Shared<BoxFuture<'static, Result<(), Arc<AnyError>>>>), + /// The result errored. + Errored(Arc<AnyError>), + /// This package has already been cached. + Cached, +} + +/// Coordinates caching of tarballs being loaded from +/// the npm registry. +/// +/// This is shared amongst all the workers. +#[derive(Debug)] +pub struct TarballCache { + cache: Arc<NpmCache>, + fs: Arc<dyn FileSystem>, + npmrc: Arc<ResolvedNpmRc>, + progress_bar: ProgressBar, + memory_cache: Mutex<HashMap<PackageNv, MemoryCacheItem>>, +} + +impl TarballCache { + pub fn new( + cache: Arc<NpmCache>, + fs: Arc<dyn FileSystem>, + npmrc: Arc<ResolvedNpmRc>, + progress_bar: ProgressBar, + ) -> Self { + Self { + cache, + fs, + npmrc, + progress_bar, + memory_cache: Default::default(), + } + } + + pub async fn ensure_package( + &self, + package: &PackageNv, + dist: &NpmPackageVersionDistInfo, + // it's not safe to share these across runtimes + http_client_for_runtime: &Arc<HttpClient>, + ) -> Result<(), AnyError> { + self + .ensure_package_inner(package, dist, http_client_for_runtime) + .await + .with_context(|| format!("Failed caching npm package '{}'.", package)) + } + + async fn ensure_package_inner( + &self, + package_nv: &PackageNv, + dist: &NpmPackageVersionDistInfo, + http_client_for_runtime: &Arc<HttpClient>, + ) -> Result<(), AnyError> { + let (created, cache_item) = { + let mut mem_cache = self.memory_cache.lock(); + if let Some(cache_item) = mem_cache.get(package_nv) { + (false, cache_item.clone()) + } else { + let future = self.create_setup_future( + package_nv.clone(), + dist.clone(), + http_client_for_runtime.clone(), + ); + let cache_item = MemoryCacheItem::PendingFuture(future); + mem_cache.insert(package_nv.clone(), cache_item.clone()); + (true, cache_item) + } + }; + + match cache_item { + MemoryCacheItem::Cached => Ok(()), + MemoryCacheItem::Errored(err) => Err(anyhow!("{}", err)), + MemoryCacheItem::PendingFuture(future) => { + if created { + match future.await { + Ok(_) => { + *self.memory_cache.lock().get_mut(package_nv).unwrap() = + MemoryCacheItem::Cached; + Ok(()) + } + Err(err) => { + let result_err = anyhow!("{}", err); + *self.memory_cache.lock().get_mut(package_nv).unwrap() = + MemoryCacheItem::Errored(err); + Err(result_err) + } + } + } else { + future.await.map_err(|err| anyhow!("{}", err)) + } + } + } + } + + fn create_setup_future( + &self, + package_nv: PackageNv, + dist: NpmPackageVersionDistInfo, + http_client_for_runtime: Arc<HttpClient>, + ) -> Shared<BoxFuture<'static, Result<(), Arc<AnyError>>>> { + let registry_url = self.npmrc.get_registry_url(&package_nv.name); + let registry_config = + self.npmrc.get_registry_config(&package_nv.name).clone(); + + let cache = self.cache.clone(); + let fs = self.fs.clone(); + let progress_bar = self.progress_bar.clone(); + let package_folder = + cache.package_folder_for_nv_and_url(&package_nv, registry_url); + + deno_core::unsync::spawn(async move { + let should_use_cache = cache.should_use_cache_for_package(&package_nv); + let package_folder_exists = fs.exists_sync(&package_folder); + if should_use_cache && package_folder_exists { + return Ok(()); + } else if cache.cache_setting() == &CacheSetting::Only { + return Err(custom_error( + "NotCached", + format!( + "An npm specifier not found in cache: \"{}\", --cached-only is specified.", + &package_nv.name + ) + ) + ); + } + + if dist.tarball.is_empty() { + bail!("Tarball URL was empty."); + } + + let maybe_auth_header = + maybe_auth_header_for_npm_registry(®istry_config); + + let guard = progress_bar.update(&dist.tarball); + let maybe_bytes = http_client_for_runtime + .download_with_progress(&dist.tarball, maybe_auth_header, &guard) + .await?; + match maybe_bytes { + Some(bytes) => { + let extraction_mode = if should_use_cache || !package_folder_exists { + TarballExtractionMode::SiblingTempDir + } else { + // The user ran with `--reload`, so overwrite the package instead of + // deleting it since the package might get corrupted if a user kills + // their deno process while it's deleting a package directory + // + // We can't rename this folder and delete it because the folder + // may be in use by another process or may now contain hardlinks, + // which will cause windows to throw an "AccessDenied" error when + // renaming. So we settle for overwriting. + TarballExtractionMode::Overwrite + }; + let dist = dist.clone(); + let package_nv = package_nv.clone(); + deno_core::unsync::spawn_blocking(move || { + verify_and_extract_tarball( + &package_nv, + &bytes, + &dist, + &package_folder, + extraction_mode, + ) + }) + .await? + } + None => { + bail!("Could not find npm package tarball at: {}", dist.tarball); + } + } + }) + .map(|result| result.unwrap().map_err(Arc::new)) + .boxed() + .shared() + } +} diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/cache/tarball_extract.rs index e2d242e66..e2d242e66 100644 --- a/cli/npm/managed/tarball.rs +++ b/cli/npm/managed/cache/tarball_extract.rs diff --git a/cli/npm/managed/mod.rs b/cli/npm/managed/mod.rs index 95d4dca0f..718806ced 100644 --- a/cli/npm/managed/mod.rs +++ b/cli/npm/managed/mod.rs @@ -4,6 +4,7 @@ use std::path::Path; use std::path::PathBuf; use std::sync::Arc; +use cache::RegistryInfoDownloader; use deno_ast::ModuleSpecifier; use deno_core::anyhow::Context; use deno_core::error::AnyError; @@ -30,6 +31,7 @@ use crate::args::NpmProcessState; use crate::args::NpmProcessStateKind; use crate::args::PackageJsonDepsProvider; use crate::cache::FastInsecureHasher; +use crate::http_util::HttpClient; use crate::util::fs::canonicalize_path_maybe_not_exists_with_fs; use crate::util::progress_bar::ProgressBar; @@ -49,7 +51,6 @@ mod installer; mod registry; mod resolution; mod resolvers; -mod tarball; pub enum CliNpmResolverManagedSnapshotOption { ResolveFromLockfile(Arc<Mutex<Lockfile>>), @@ -88,15 +89,17 @@ pub async fn create_managed_npm_resolver_for_lsp( } }; create_inner( - npm_cache, - npm_api, - snapshot, - options.maybe_lockfile, options.fs, + options.http_client, + options.maybe_lockfile, + npm_api, + npm_cache, + options.npmrc, + options.package_json_installer, options.text_only_progress_bar, options.maybe_node_modules_path, - options.package_json_installer, options.npm_system_info, + snapshot, ) } @@ -107,38 +110,43 @@ pub async fn create_managed_npm_resolver( let npm_api = create_api(&options, npm_cache.clone()); let snapshot = resolve_snapshot(&npm_api, options.snapshot).await?; Ok(create_inner( - npm_cache, - npm_api, - snapshot, - options.maybe_lockfile, options.fs, + options.http_client, + options.maybe_lockfile, + npm_api, + npm_cache, + options.npmrc, + options.package_json_installer, options.text_only_progress_bar, options.maybe_node_modules_path, - options.package_json_installer, options.npm_system_info, + snapshot, )) } #[allow(clippy::too_many_arguments)] fn create_inner( - npm_cache: Arc<NpmCache>, - npm_api: Arc<CliNpmRegistryApi>, - snapshot: Option<ValidSerializedNpmResolutionSnapshot>, - maybe_lockfile: Option<Arc<Mutex<Lockfile>>>, fs: Arc<dyn deno_runtime::deno_fs::FileSystem>, + http_client: Arc<HttpClient>, + maybe_lockfile: Option<Arc<Mutex<Lockfile>>>, + npm_api: Arc<CliNpmRegistryApi>, + npm_cache: Arc<NpmCache>, + npm_rc: Arc<ResolvedNpmRc>, + package_json_installer: CliNpmResolverManagedPackageJsonInstallerOption, text_only_progress_bar: crate::util::progress_bar::ProgressBar, node_modules_dir_path: Option<PathBuf>, - package_json_installer: CliNpmResolverManagedPackageJsonInstallerOption, npm_system_info: NpmSystemInfo, + snapshot: Option<ValidSerializedNpmResolutionSnapshot>, ) -> Arc<dyn CliNpmResolver> { let resolution = Arc::new(NpmResolution::from_serialized( npm_api.clone(), snapshot, maybe_lockfile.clone(), )); - let npm_fs_resolver = create_npm_fs_resolver( + let fs_resolver = create_npm_fs_resolver( fs.clone(), npm_cache.clone(), + npm_rc.clone(), &text_only_progress_bar, resolution.clone(), node_modules_dir_path, @@ -157,13 +165,15 @@ fn create_inner( } }; Arc::new(ManagedCliNpmResolver::new( - npm_api, fs, - resolution, - npm_fs_resolver, - npm_cache, + fs_resolver, + http_client, maybe_lockfile, + npm_api, + npm_cache, + npm_rc, package_json_deps_installer, + resolution, text_only_progress_bar, npm_system_info, )) @@ -176,9 +186,6 @@ fn create_cache(options: &CliNpmResolverManagedCreateOptions) -> Arc<NpmCache> { options.npmrc.get_all_known_registries_urls(), ), options.cache_setting.clone(), - options.fs.clone(), - options.http_client.clone(), - options.text_only_progress_bar.clone(), options.npmrc.clone(), )) } @@ -190,8 +197,11 @@ fn create_api( Arc::new(CliNpmRegistryApi::new( npm_cache.clone(), options.http_client.clone(), - options.npmrc.clone(), - options.text_only_progress_bar.clone(), + RegistryInfoDownloader::new( + npm_cache, + options.npmrc.clone(), + options.text_only_progress_bar.clone(), + ), )) } @@ -210,8 +220,6 @@ async fn resolve_snapshot( lockfile.lock().filename.display() ) })?; - // clear the memory cache to reduce memory usage - api.clear_memory_cache(); Ok(Some(snapshot)) } else { Ok(None) @@ -246,15 +254,17 @@ async fn snapshot_from_lockfile( /// An npm resolver where the resolution is managed by Deno rather than /// the user bringing their own node_modules (BYONM) on the file system. pub struct ManagedCliNpmResolver { - api: Arc<CliNpmRegistryApi>, fs: Arc<dyn FileSystem>, fs_resolver: Arc<dyn NpmPackageFsResolver>, - global_npm_cache: Arc<NpmCache>, - resolution: Arc<NpmResolution>, + http_client: Arc<HttpClient>, maybe_lockfile: Option<Arc<Mutex<Lockfile>>>, - npm_system_info: NpmSystemInfo, - progress_bar: ProgressBar, + npm_api: Arc<CliNpmRegistryApi>, + npm_cache: Arc<NpmCache>, + npm_rc: Arc<ResolvedNpmRc>, package_json_deps_installer: Arc<PackageJsonDepsInstaller>, + resolution: Arc<NpmResolution>, + text_only_progress_bar: ProgressBar, + npm_system_info: NpmSystemInfo, } impl std::fmt::Debug for ManagedCliNpmResolver { @@ -268,25 +278,29 @@ impl std::fmt::Debug for ManagedCliNpmResolver { impl ManagedCliNpmResolver { #[allow(clippy::too_many_arguments)] pub fn new( - api: Arc<CliNpmRegistryApi>, fs: Arc<dyn FileSystem>, - resolution: Arc<NpmResolution>, fs_resolver: Arc<dyn NpmPackageFsResolver>, - global_npm_cache: Arc<NpmCache>, + http_client: Arc<HttpClient>, maybe_lockfile: Option<Arc<Mutex<Lockfile>>>, + npm_api: Arc<CliNpmRegistryApi>, + npm_cache: Arc<NpmCache>, + npm_rc: Arc<ResolvedNpmRc>, package_json_deps_installer: Arc<PackageJsonDepsInstaller>, - progress_bar: ProgressBar, + resolution: Arc<NpmResolution>, + text_only_progress_bar: ProgressBar, npm_system_info: NpmSystemInfo, ) -> Self { Self { - api, fs, fs_resolver, - global_npm_cache, - resolution, + http_client, maybe_lockfile, + npm_api, + npm_cache, + npm_rc, package_json_deps_installer, - progress_bar, + text_only_progress_bar, + resolution, npm_system_info, } } @@ -367,7 +381,7 @@ impl ManagedCliNpmResolver { } self.resolution.add_package_reqs(packages).await?; - self.fs_resolver.cache_packages().await?; + self.fs_resolver.cache_packages(&self.http_client).await?; // If there's a lock file, update it with all discovered npm packages if let Some(lockfile) = &self.maybe_lockfile { @@ -410,7 +424,7 @@ impl ManagedCliNpmResolver { // add and ensure this isn't added to the lockfile let package_reqs = vec![PackageReq::from_str("@types/node").unwrap()]; self.resolution.add_package_reqs(&package_reqs).await?; - self.fs_resolver.cache_packages().await?; + self.cache_packages().await?; Ok(()) } @@ -421,7 +435,7 @@ impl ManagedCliNpmResolver { } pub async fn cache_packages(&self) -> Result<(), AnyError> { - self.fs_resolver.cache_packages().await + self.fs_resolver.cache_packages(&self.http_client).await } /// Resolves a package requirement for deno graph. This should only be @@ -435,7 +449,7 @@ impl ManagedCliNpmResolver { match result { Ok(nv) => NpmPackageReqResolution::Ok(nv), Err(err) => { - if self.api.mark_force_reload() { + if self.npm_api.mark_force_reload() { log::debug!("Restarting npm specifier resolution to check for new registry information. Error: {:#}", err); NpmPackageReqResolution::ReloadRegistryInfo(err.into()) } else { @@ -475,7 +489,7 @@ impl ManagedCliNpmResolver { ) -> Result<(), AnyError> { // this will internally cache the package information self - .api + .npm_api .package_info(package_name) .await .map(|_| ()) @@ -483,7 +497,7 @@ impl ManagedCliNpmResolver { } pub fn global_cache_root_folder(&self) -> PathBuf { - self.global_npm_cache.root_folder() + self.npm_cache.root_folder() } } @@ -543,27 +557,30 @@ impl CliNpmResolver for ManagedCliNpmResolver { fn clone_snapshotted(&self) -> Arc<dyn CliNpmResolver> { // create a new snapshotted npm resolution and resolver let npm_resolution = Arc::new(NpmResolution::new( - self.api.clone(), + self.npm_api.clone(), self.resolution.snapshot(), self.maybe_lockfile.clone(), )); Arc::new(ManagedCliNpmResolver::new( - self.api.clone(), self.fs.clone(), - npm_resolution.clone(), create_npm_fs_resolver( self.fs.clone(), - self.global_npm_cache.clone(), - &self.progress_bar, - npm_resolution, + self.npm_cache.clone(), + self.npm_rc.clone(), + &self.text_only_progress_bar, + npm_resolution.clone(), self.root_node_modules_path().map(ToOwned::to_owned), self.npm_system_info.clone(), ), - self.global_npm_cache.clone(), + self.http_client.clone(), self.maybe_lockfile.clone(), + self.npm_api.clone(), + self.npm_cache.clone(), + self.npm_rc.clone(), self.package_json_deps_installer.clone(), - self.progress_bar.clone(), + npm_resolution, + self.text_only_progress_bar.clone(), self.npm_system_info.clone(), )) } diff --git a/cli/npm/managed/registry.rs b/cli/npm/managed/registry.rs index 861ce2a4b..364529ed2 100644 --- a/cli/npm/managed/registry.rs +++ b/cli/npm/managed/registry.rs @@ -2,38 +2,27 @@ use std::collections::HashMap; use std::collections::HashSet; -use std::fs; -use std::io::ErrorKind; -use std::path::PathBuf; use std::sync::Arc; use async_trait::async_trait; use deno_core::anyhow::anyhow; -use deno_core::anyhow::Context; -use deno_core::error::custom_error; use deno_core::error::AnyError; use deno_core::futures::future::BoxFuture; use deno_core::futures::future::Shared; use deno_core::futures::FutureExt; use deno_core::parking_lot::Mutex; -use deno_core::serde_json; -use deno_core::url::Url; -use deno_npm::npm_rc::RegistryConfig; -use deno_npm::npm_rc::ResolvedNpmRc; use deno_npm::registry::NpmPackageInfo; use deno_npm::registry::NpmRegistryApi; use deno_npm::registry::NpmRegistryPackageInfoLoadError; use crate::args::CacheSetting; -use crate::cache::CACHE_PERM; use crate::http_util::HttpClient; -use crate::npm::common::maybe_auth_header_for_npm_registry; -use crate::util::fs::atomic_write_file; -use crate::util::progress_bar::ProgressBar; use crate::util::sync::AtomicFlag; use super::cache::NpmCache; +use super::cache::RegistryInfoDownloader; +// todo(dsherret): make this per worker and make HttpClient an Rc<HttpClient> #[derive(Debug)] pub struct CliNpmRegistryApi(Option<Arc<CliNpmRegistryApiInner>>); @@ -41,17 +30,15 @@ impl CliNpmRegistryApi { pub fn new( cache: Arc<NpmCache>, http_client: Arc<HttpClient>, - npmrc: Arc<ResolvedNpmRc>, - progress_bar: ProgressBar, + registry_info_downloader: RegistryInfoDownloader, ) -> Self { Self(Some(Arc::new(CliNpmRegistryApiInner { cache, force_reload_flag: Default::default(), mem_cache: Default::default(), previously_reloaded_packages: Default::default(), - npmrc, http_client, - progress_bar, + registry_info_downloader, }))) } @@ -125,8 +112,7 @@ struct CliNpmRegistryApiInner { mem_cache: Mutex<HashMap<String, CacheItem>>, previously_reloaded_packages: Mutex<HashSet<String>>, http_client: Arc<HttpClient>, - npmrc: Arc<ResolvedNpmRc>, - progress_bar: ProgressBar, + registry_info_downloader: RegistryInfoDownloader, } impl CliNpmRegistryApiInner { @@ -157,10 +143,9 @@ impl CliNpmRegistryApiInner { return Ok(result); } } - api - .load_package_info_from_registry(&name) + api.registry_info_downloader + .load_package_info(&name, &api.http_client) .await - .map(|info| info.map(Arc::new)) .map_err(Arc::new) } .boxed() @@ -202,7 +187,14 @@ impl CliNpmRegistryApiInner { &self, name: &str, ) -> Option<NpmPackageInfo> { - match self.load_file_cached_package_info_result(name).await { + let result = deno_core::unsync::spawn_blocking({ + let cache = self.cache.clone(); + let name = name.to_string(); + move || cache.load_package_info(&name) + }) + .await + .unwrap(); + match result { Ok(value) => value, Err(err) => { if cfg!(debug_assertions) { @@ -214,149 +206,6 @@ impl CliNpmRegistryApiInner { } } - async fn load_file_cached_package_info_result( - &self, - name: &str, - ) -> Result<Option<NpmPackageInfo>, AnyError> { - let file_cache_path = self.get_package_file_cache_path(name); - let deserialization_result = deno_core::unsync::spawn_blocking(|| { - let file_text = match fs::read_to_string(file_cache_path) { - Ok(file_text) => file_text, - Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), - Err(err) => return Err(err.into()), - }; - serde_json::from_str(&file_text) - .map(Some) - .map_err(AnyError::from) - }) - .await - .unwrap(); - match deserialization_result { - Ok(maybe_package_info) => Ok(maybe_package_info), - Err(err) => { - // This scenario might mean we need to load more data from the - // npm registry than before. So, just debug log while in debug - // rather than panic. - log::debug!( - "error deserializing registry.json for '{}'. Reloading. {:?}", - name, - err - ); - Ok(None) - } - } - } - - fn save_package_info_to_file_cache( - &self, - name: &str, - package_info: &NpmPackageInfo, - ) { - if let Err(err) = - self.save_package_info_to_file_cache_result(name, package_info) - { - if cfg!(debug_assertions) { - panic!("error saving cached npm package info for {name}: {err:#}"); - } - } - } - - fn save_package_info_to_file_cache_result( - &self, - name: &str, - package_info: &NpmPackageInfo, - ) -> Result<(), AnyError> { - let file_cache_path = self.get_package_file_cache_path(name); - let file_text = serde_json::to_string(&package_info)?; - atomic_write_file(&file_cache_path, file_text, CACHE_PERM)?; - Ok(()) - } - - async fn load_package_info_from_registry( - &self, - name: &str, - ) -> Result<Option<NpmPackageInfo>, AnyError> { - let registry_url = self.npmrc.get_registry_url(name); - let registry_config = self.npmrc.get_registry_config(name); - - self - .load_package_info_from_registry_inner( - name, - registry_url, - registry_config, - ) - .await - .with_context(|| { - format!( - "Error getting response at {} for package \"{}\"", - self.get_package_url(name, registry_url), - name - ) - }) - } - - async fn load_package_info_from_registry_inner( - &self, - name: &str, - registry_url: &Url, - registry_config: &RegistryConfig, - ) -> Result<Option<NpmPackageInfo>, AnyError> { - if *self.cache.cache_setting() == CacheSetting::Only { - return Err(custom_error( - "NotCached", - format!( - "An npm specifier not found in cache: \"{name}\", --cached-only is specified." - ) - )); - } - - let package_url = self.get_package_url(name, registry_url); - let guard = self.progress_bar.update(package_url.as_str()); - - let maybe_auth_header = maybe_auth_header_for_npm_registry(registry_config); - - let maybe_bytes = self - .http_client - .download_with_progress(package_url, maybe_auth_header, &guard) - .await?; - match maybe_bytes { - Some(bytes) => { - let package_info = deno_core::unsync::spawn_blocking(move || { - serde_json::from_slice(&bytes) - }) - .await??; - self.save_package_info_to_file_cache(name, &package_info); - Ok(Some(package_info)) - } - None => Ok(None), - } - } - - fn get_package_url(&self, name: &str, registry_url: &Url) -> Url { - // list of all characters used in npm packages: - // !, ', (, ), *, -, ., /, [0-9], @, [A-Za-z], _, ~ - const ASCII_SET: percent_encoding::AsciiSet = - percent_encoding::NON_ALPHANUMERIC - .remove(b'!') - .remove(b'\'') - .remove(b'(') - .remove(b')') - .remove(b'*') - .remove(b'-') - .remove(b'.') - .remove(b'/') - .remove(b'@') - .remove(b'_') - .remove(b'~'); - let name = percent_encoding::utf8_percent_encode(name, &ASCII_SET); - registry_url.join(&name.to_string()).unwrap() - } - - fn get_package_file_cache_path(&self, name: &str) -> PathBuf { - let name_folder_path = self.cache.package_name_folder(name); - name_folder_path.join("registry.json") - } - fn clear_memory_cache(&self) { self.mem_cache.lock().clear(); } diff --git a/cli/npm/managed/resolvers/common.rs b/cli/npm/managed/resolvers/common.rs index b010bdd7c..2d540accd 100644 --- a/cli/npm/managed/resolvers/common.rs +++ b/cli/npm/managed/resolvers/common.rs @@ -12,7 +12,7 @@ use deno_ast::ModuleSpecifier; use deno_core::anyhow::Context; use deno_core::error::AnyError; use deno_core::futures; -use deno_core::unsync::spawn; +use deno_core::futures::StreamExt; use deno_core::url::Url; use deno_npm::NpmPackageCacheFolderId; use deno_npm::NpmPackageId; @@ -21,7 +21,8 @@ use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; -use super::super::cache::NpmCache; +use crate::http_util::HttpClient; +use crate::npm::managed::cache::TarballCache; /// Part of the resolution that interacts with the file system. #[async_trait] @@ -49,7 +50,10 @@ pub trait NpmPackageFsResolver: Send + Sync { specifier: &ModuleSpecifier, ) -> Result<Option<NpmPackageCacheFolderId>, AnyError>; - async fn cache_packages(&self) -> Result<(), AnyError>; + async fn cache_packages( + &self, + http_client: &Arc<HttpClient>, + ) -> Result<(), AnyError>; fn ensure_read_permission( &self, @@ -126,20 +130,20 @@ impl RegistryReadPermissionChecker { /// Caches all the packages in parallel. pub async fn cache_packages( packages: Vec<NpmResolutionPackage>, - cache: &Arc<NpmCache>, + tarball_cache: &Arc<TarballCache>, + http_client: &Arc<HttpClient>, ) -> Result<(), AnyError> { - let mut handles = Vec::with_capacity(packages.len()); + let mut futures_unordered = futures::stream::FuturesUnordered::new(); for package in packages { - let cache = cache.clone(); - let handle = spawn(async move { - cache.ensure_package(&package.id.nv, &package.dist).await + futures_unordered.push(async move { + tarball_cache + .ensure_package(&package.id.nv, &package.dist, http_client) + .await }); - handles.push(handle); } - let results = futures::future::join_all(handles).await; - for result in results { + while let Some(result) = futures_unordered.next().await { // surface the first error - result??; + result?; } Ok(()) } diff --git a/cli/npm/managed/resolvers/global.rs b/cli/npm/managed/resolvers/global.rs index cfc57e591..4ffcb251f 100644 --- a/cli/npm/managed/resolvers/global.rs +++ b/cli/npm/managed/resolvers/global.rs @@ -20,8 +20,11 @@ use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; +use crate::http_util::HttpClient; + use super::super::super::common::types_package_name; use super::super::cache::NpmCache; +use super::super::cache::TarballCache; use super::super::resolution::NpmResolution; use super::common::cache_packages; use super::common::NpmPackageFsResolver; @@ -31,6 +34,7 @@ use super::common::RegistryReadPermissionChecker; #[derive(Debug)] pub struct GlobalNpmPackageResolver { cache: Arc<NpmCache>, + tarball_cache: Arc<TarballCache>, resolution: Arc<NpmResolution>, system_info: NpmSystemInfo, registry_read_permission_checker: RegistryReadPermissionChecker, @@ -38,19 +42,21 @@ pub struct GlobalNpmPackageResolver { impl GlobalNpmPackageResolver { pub fn new( - fs: Arc<dyn FileSystem>, cache: Arc<NpmCache>, + fs: Arc<dyn FileSystem>, + tarball_cache: Arc<TarballCache>, resolution: Arc<NpmResolution>, system_info: NpmSystemInfo, ) -> Self { Self { - cache: cache.clone(), - resolution, - system_info, registry_read_permission_checker: RegistryReadPermissionChecker::new( fs, cache.root_folder(), ), + cache, + tarball_cache, + resolution, + system_info, } } @@ -123,12 +129,20 @@ impl NpmPackageFsResolver for GlobalNpmPackageResolver { ) } - async fn cache_packages(&self) -> Result<(), AnyError> { + async fn cache_packages( + &self, + http_client: &Arc<HttpClient>, + ) -> Result<(), AnyError> { let package_partitions = self .resolution .all_system_packages_partitioned(&self.system_info); - cache_packages(package_partitions.packages, &self.cache).await?; + cache_packages( + package_partitions.packages, + &self.tarball_cache, + http_client, + ) + .await?; // create the copy package folders for copy in package_partitions.copy_packages { diff --git a/cli/npm/managed/resolvers/local.rs b/cli/npm/managed/resolvers/local.rs index f0c2a3f65..5c3b1f15e 100644 --- a/cli/npm/managed/resolvers/local.rs +++ b/cli/npm/managed/resolvers/local.rs @@ -14,6 +14,7 @@ use std::path::PathBuf; use std::sync::Arc; use crate::cache::CACHE_PERM; +use crate::http_util::HttpClient; use crate::npm::cache_dir::mixed_case_package_name_decode; use crate::util::fs::atomic_write_file; use crate::util::fs::canonicalize_path_maybe_not_exists_with_fs; @@ -27,16 +28,15 @@ use deno_ast::ModuleSpecifier; use deno_core::anyhow::bail; use deno_core::anyhow::Context; use deno_core::error::AnyError; +use deno_core::futures::stream::FuturesUnordered; +use deno_core::futures::StreamExt; use deno_core::parking_lot::Mutex; -use deno_core::unsync::spawn; -use deno_core::unsync::JoinHandle; use deno_core::url::Url; use deno_npm::resolution::NpmResolutionSnapshot; use deno_npm::NpmPackageCacheFolderId; use deno_npm::NpmPackageId; use deno_npm::NpmResolutionPackage; use deno_npm::NpmSystemInfo; -use deno_runtime::deno_core::futures; use deno_runtime::deno_fs; use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; @@ -48,6 +48,7 @@ use crate::npm::cache_dir::mixed_case_package_name_encode; use super::super::super::common::types_package_name; use super::super::cache::NpmCache; +use super::super::cache::TarballCache; use super::super::resolution::NpmResolution; use super::common::NpmPackageFsResolver; use super::common::RegistryReadPermissionChecker; @@ -56,10 +57,11 @@ use super::common::RegistryReadPermissionChecker; /// and resolves packages from it. #[derive(Debug)] pub struct LocalNpmPackageResolver { - fs: Arc<dyn deno_fs::FileSystem>, cache: Arc<NpmCache>, + fs: Arc<dyn deno_fs::FileSystem>, progress_bar: ProgressBar, resolution: Arc<NpmResolution>, + tarball_cache: Arc<TarballCache>, root_node_modules_path: PathBuf, root_node_modules_url: Url, system_info: NpmSystemInfo, @@ -68,26 +70,28 @@ pub struct LocalNpmPackageResolver { impl LocalNpmPackageResolver { pub fn new( - fs: Arc<dyn deno_fs::FileSystem>, cache: Arc<NpmCache>, + fs: Arc<dyn deno_fs::FileSystem>, progress_bar: ProgressBar, - node_modules_folder: PathBuf, resolution: Arc<NpmResolution>, + tarball_cache: Arc<TarballCache>, + node_modules_folder: PathBuf, system_info: NpmSystemInfo, ) -> Self { Self { - fs: fs.clone(), cache, + fs: fs.clone(), progress_bar, resolution, - root_node_modules_url: Url::from_directory_path(&node_modules_folder) - .unwrap(), - root_node_modules_path: node_modules_folder.clone(), - system_info, + tarball_cache, registry_read_permission_checker: RegistryReadPermissionChecker::new( fs, - node_modules_folder, + node_modules_folder.clone(), ), + root_node_modules_url: Url::from_directory_path(&node_modules_folder) + .unwrap(), + root_node_modules_path: node_modules_folder, + system_info, } } @@ -225,11 +229,16 @@ impl NpmPackageFsResolver for LocalNpmPackageResolver { Ok(get_package_folder_id_from_folder_name(&folder_name)) } - async fn cache_packages(&self) -> Result<(), AnyError> { + async fn cache_packages( + &self, + http_client: &Arc<HttpClient>, + ) -> Result<(), AnyError> { sync_resolution_with_fs( &self.resolution.snapshot(), &self.cache, + http_client, &self.progress_bar, + &self.tarball_cache, &self.root_node_modules_path, &self.system_info, ) @@ -251,7 +260,9 @@ impl NpmPackageFsResolver for LocalNpmPackageResolver { async fn sync_resolution_with_fs( snapshot: &NpmResolutionSnapshot, cache: &Arc<NpmCache>, + http_client: &Arc<HttpClient>, progress_bar: &ProgressBar, + tarball_cache: &Arc<TarballCache>, root_node_modules_dir_path: &Path, system_info: &NpmSystemInfo, ) -> Result<(), AnyError> { @@ -288,8 +299,7 @@ async fn sync_resolution_with_fs( // node_modules/.deno/<package_folder_id_folder_name>/node_modules/<package_name> let package_partitions = snapshot.all_system_packages_partitioned(system_info); - let mut handles: Vec<JoinHandle<Result<(), AnyError>>> = - Vec::with_capacity(package_partitions.packages.len()); + let mut cache_futures = FuturesUnordered::new(); let mut newest_packages_by_name: HashMap<&String, &NpmResolutionPackage> = HashMap::with_capacity(package_partitions.packages.len()); let bin_entries = Arc::new(Mutex::new(bin_entries::BinEntries::new())); @@ -317,21 +327,19 @@ async fn sync_resolution_with_fs( // are forced to be recreated setup_cache.remove_dep(&package_folder_name); - let pb = progress_bar.clone(); - let cache = cache.clone(); - let package = package.clone(); let bin_entries_to_setup = bin_entries.clone(); - let handle = spawn(async move { - cache.ensure_package(&package.id.nv, &package.dist).await?; - let pb_guard = pb.update_with_prompt( + cache_futures.push(async move { + tarball_cache + .ensure_package(&package.id.nv, &package.dist, http_client) + .await?; + let pb_guard = progress_bar.update_with_prompt( ProgressMessagePrompt::Initialize, &package.id.nv.to_string(), ); let sub_node_modules = folder_path.join("node_modules"); let package_path = join_package_name(&sub_node_modules, &package.id.nv.name); - let cache_folder = - cache.package_folder_for_name_and_version(&package.id.nv); + let cache_folder = cache.package_folder_for_nv(&package.id.nv); deno_core::unsync::spawn_blocking({ let package_path = package_path.clone(); @@ -353,15 +361,13 @@ async fn sync_resolution_with_fs( // finally stop showing the progress bar drop(pb_guard); // explicit for clarity - Ok(()) + Ok::<_, AnyError>(()) }); - handles.push(handle); } } - let results = futures::future::join_all(handles).await; - for result in results { - result??; // surface the first error + while let Some(result) = cache_futures.next().await { + result?; // surface the first error } // 2. Create any "copy" packages, which are used for peer dependencies diff --git a/cli/npm/managed/resolvers/mod.rs b/cli/npm/managed/resolvers/mod.rs index d5472344a..5f0343805 100644 --- a/cli/npm/managed/resolvers/mod.rs +++ b/cli/npm/managed/resolvers/mod.rs @@ -7,6 +7,7 @@ mod local; use std::path::PathBuf; use std::sync::Arc; +use deno_npm::npm_rc::ResolvedNpmRc; use deno_npm::NpmSystemInfo; use deno_runtime::deno_fs::FileSystem; @@ -18,28 +19,38 @@ use self::global::GlobalNpmPackageResolver; use self::local::LocalNpmPackageResolver; use super::cache::NpmCache; +use super::cache::TarballCache; use super::resolution::NpmResolution; pub fn create_npm_fs_resolver( fs: Arc<dyn FileSystem>, - cache: Arc<NpmCache>, + npm_cache: Arc<NpmCache>, + npm_rc: Arc<ResolvedNpmRc>, progress_bar: &ProgressBar, resolution: Arc<NpmResolution>, maybe_node_modules_path: Option<PathBuf>, system_info: NpmSystemInfo, ) -> Arc<dyn NpmPackageFsResolver> { + let tarball_cache = Arc::new(TarballCache::new( + npm_cache.clone(), + fs.clone(), + npm_rc, + progress_bar.clone(), + )); match maybe_node_modules_path { Some(node_modules_folder) => Arc::new(LocalNpmPackageResolver::new( + npm_cache, fs, - cache, progress_bar.clone(), - node_modules_folder, resolution, + tarball_cache, + node_modules_folder, system_info, )), None => Arc::new(GlobalNpmPackageResolver::new( + npm_cache, fs, - cache, + tarball_cache, resolution, system_info, )), |