diff options
Diffstat (limited to 'cli/npm/managed')
-rw-r--r-- | cli/npm/managed/cache.rs | 278 | ||||
-rw-r--r-- | cli/npm/managed/installer.rs | 2 | ||||
-rw-r--r-- | cli/npm/managed/mod.rs | 285 | ||||
-rw-r--r-- | cli/npm/managed/registry.rs | 358 | ||||
-rw-r--r-- | cli/npm/managed/resolution.rs | 14 | ||||
-rw-r--r-- | cli/npm/managed/resolvers/common.rs | 2 | ||||
-rw-r--r-- | cli/npm/managed/resolvers/global.rs | 3 | ||||
-rw-r--r-- | cli/npm/managed/resolvers/local.rs | 6 | ||||
-rw-r--r-- | cli/npm/managed/resolvers/mod.rs | 5 | ||||
-rw-r--r-- | cli/npm/managed/tarball.rs | 241 |
10 files changed, 1150 insertions, 44 deletions
diff --git a/cli/npm/managed/cache.rs b/cli/npm/managed/cache.rs new file mode 100644 index 000000000..91d6ec656 --- /dev/null +++ b/cli/npm/managed/cache.rs @@ -0,0 +1,278 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashSet; +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; + +use deno_ast::ModuleSpecifier; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::custom_error; +use deno_core::error::AnyError; +use deno_core::parking_lot::Mutex; +use deno_core::url::Url; +use deno_npm::registry::NpmPackageVersionDistInfo; +use deno_npm::NpmPackageCacheFolderId; +use deno_runtime::deno_fs; +use deno_semver::package::PackageNv; + +use crate::args::CacheSetting; +use crate::http_util::HttpClient; +use crate::npm::NpmCacheDir; +use crate::util::fs::hard_link_dir_recursive; +use crate::util::progress_bar::ProgressBar; + +use super::tarball::verify_and_extract_tarball; + +/// Stores a single copy of npm packages in a cache. +#[derive(Debug)] +pub struct NpmCache { + cache_dir: NpmCacheDir, + cache_setting: CacheSetting, + fs: Arc<dyn deno_fs::FileSystem>, + http_client: Arc<HttpClient>, + progress_bar: ProgressBar, + /// ensures a package is only downloaded once per run + previously_reloaded_packages: Mutex<HashSet<PackageNv>>, +} + +impl NpmCache { + pub fn new( + cache_dir: NpmCacheDir, + cache_setting: CacheSetting, + fs: Arc<dyn deno_fs::FileSystem>, + http_client: Arc<HttpClient>, + progress_bar: ProgressBar, + ) -> Self { + Self { + cache_dir, + cache_setting, + fs, + http_client, + progress_bar, + previously_reloaded_packages: Default::default(), + } + } + + pub fn cache_setting(&self) -> &CacheSetting { + &self.cache_setting + } + + pub fn root_dir_url(&self) -> &Url { + self.cache_dir.root_dir_url() + } + + /// Checks if the cache should be used for the provided name and version. + /// NOTE: Subsequent calls for the same package will always return `true` + /// to ensure a package is only downloaded once per run of the CLI. This + /// prevents downloads from re-occurring when someone has `--reload` and + /// and imports a dynamic import that imports the same package again for example. + fn should_use_global_cache_for_package(&self, package: &PackageNv) -> bool { + self.cache_setting.should_use_for_npm_package(&package.name) + || !self + .previously_reloaded_packages + .lock() + .insert(package.clone()) + } + + pub async fn ensure_package( + &self, + package: &PackageNv, + dist: &NpmPackageVersionDistInfo, + registry_url: &Url, + ) -> Result<(), AnyError> { + self + .ensure_package_inner(package, dist, registry_url) + .await + .with_context(|| format!("Failed caching npm package '{package}'.")) + } + + async fn ensure_package_inner( + &self, + package: &PackageNv, + dist: &NpmPackageVersionDistInfo, + registry_url: &Url, + ) -> Result<(), AnyError> { + let package_folder = self + .cache_dir + .package_folder_for_name_and_version(package, registry_url); + if self.should_use_global_cache_for_package(package) + && self.fs.exists_sync(&package_folder) + // if this file exists, then the package didn't successfully extract + // the first time, or another process is currently extracting the zip file + && !self.fs.exists_sync(&package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME)) + { + return Ok(()); + } else if self.cache_setting == CacheSetting::Only { + return Err(custom_error( + "NotCached", + format!( + "An npm specifier not found in cache: \"{}\", --cached-only is specified.", + &package.name + ) + ) + ); + } + + if dist.tarball.is_empty() { + bail!("Tarball URL was empty."); + } + + let guard = self.progress_bar.update(&dist.tarball); + let maybe_bytes = self + .http_client + .download_with_progress(&dist.tarball, &guard) + .await?; + match maybe_bytes { + Some(bytes) => { + verify_and_extract_tarball(package, &bytes, dist, &package_folder) + } + None => { + bail!("Could not find npm package tarball at: {}", dist.tarball); + } + } + } + + /// Ensures a copy of the package exists in the global cache. + /// + /// This assumes that the original package folder being hard linked + /// from exists before this is called. + pub fn ensure_copy_package( + &self, + folder_id: &NpmPackageCacheFolderId, + registry_url: &Url, + ) -> Result<(), AnyError> { + assert_ne!(folder_id.copy_index, 0); + let package_folder = self + .cache_dir + .package_folder_for_id(folder_id, registry_url); + + if package_folder.exists() + // if this file exists, then the package didn't successfully extract + // the first time, or another process is currently extracting the zip file + && !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists() + && self.cache_setting.should_use_for_npm_package(&folder_id.nv.name) + { + return Ok(()); + } + + let original_package_folder = self + .cache_dir + .package_folder_for_name_and_version(&folder_id.nv, registry_url); + with_folder_sync_lock(&folder_id.nv, &package_folder, || { + hard_link_dir_recursive(&original_package_folder, &package_folder) + })?; + Ok(()) + } + + pub fn package_folder_for_id( + &self, + id: &NpmPackageCacheFolderId, + registry_url: &Url, + ) -> PathBuf { + self.cache_dir.package_folder_for_id(id, registry_url) + } + + pub fn package_folder_for_name_and_version( + &self, + package: &PackageNv, + registry_url: &Url, + ) -> PathBuf { + self + .cache_dir + .package_folder_for_name_and_version(package, registry_url) + } + + pub fn package_name_folder(&self, name: &str, registry_url: &Url) -> PathBuf { + self.cache_dir.package_name_folder(name, registry_url) + } + + pub fn registry_folder(&self, registry_url: &Url) -> PathBuf { + self.cache_dir.registry_folder(registry_url) + } + + pub fn resolve_package_folder_id_from_specifier( + &self, + specifier: &ModuleSpecifier, + registry_url: &Url, + ) -> Option<NpmPackageCacheFolderId> { + self + .cache_dir + .resolve_package_folder_id_from_specifier(specifier, registry_url) + } +} + +const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock"; + +pub fn with_folder_sync_lock( + package: &PackageNv, + output_folder: &Path, + action: impl FnOnce() -> Result<(), AnyError>, +) -> Result<(), AnyError> { + fn inner( + output_folder: &Path, + action: impl FnOnce() -> Result<(), AnyError>, + ) -> Result<(), AnyError> { + fs::create_dir_all(output_folder).with_context(|| { + format!("Error creating '{}'.", output_folder.display()) + })?; + + // This sync lock file is a way to ensure that partially created + // npm package directories aren't considered valid. This could maybe + // be a bit smarter in the future to not bother extracting here + // if another process has taken the lock in the past X seconds and + // wait for the other process to finish (it could try to create the + // file with `create_new(true)` then if it exists, check the metadata + // then wait until the other process finishes with a timeout), but + // for now this is good enough. + let sync_lock_path = output_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME); + match fs::OpenOptions::new() + .write(true) + .create(true) + .open(&sync_lock_path) + { + Ok(_) => { + action()?; + // extraction succeeded, so only now delete this file + let _ignore = std::fs::remove_file(&sync_lock_path); + Ok(()) + } + Err(err) => { + bail!( + concat!( + "Error creating package sync lock file at '{}'. ", + "Maybe try manually deleting this folder.\n\n{:#}", + ), + output_folder.display(), + err + ); + } + } + } + + match inner(output_folder, action) { + Ok(()) => Ok(()), + Err(err) => { + if let Err(remove_err) = fs::remove_dir_all(output_folder) { + if remove_err.kind() != std::io::ErrorKind::NotFound { + bail!( + concat!( + "Failed setting up package cache directory for {}, then ", + "failed cleaning it up.\n\nOriginal error:\n\n{}\n\n", + "Remove error:\n\n{}\n\nPlease manually ", + "delete this folder or you will run into issues using this ", + "package in the future:\n\n{}" + ), + package, + err, + remove_err, + output_folder.display(), + ); + } + } + Err(err) + } + } +} diff --git a/cli/npm/managed/installer.rs b/cli/npm/managed/installer.rs index 21285c3d7..8f3db0531 100644 --- a/cli/npm/managed/installer.rs +++ b/cli/npm/managed/installer.rs @@ -13,7 +13,7 @@ use deno_semver::package::PackageReq; use crate::args::PackageJsonDepsProvider; use crate::util::sync::AtomicFlag; -use super::super::CliNpmRegistryApi; +use super::CliNpmRegistryApi; use super::NpmResolution; #[derive(Debug)] diff --git a/cli/npm/managed/mod.rs b/cli/npm/managed/mod.rs index c5ba3d3af..df9ad59ac 100644 --- a/cli/npm/managed/mod.rs +++ b/cli/npm/managed/mod.rs @@ -6,6 +6,7 @@ use std::path::PathBuf; use std::sync::Arc; use deno_ast::ModuleSpecifier; +use deno_core::anyhow::Context; use deno_core::error::AnyError; use deno_core::parking_lot::Mutex; use deno_core::serde_json; @@ -14,7 +15,7 @@ use deno_graph::NpmPackageReqResolution; use deno_npm::registry::NpmRegistryApi; use deno_npm::resolution::NpmResolutionSnapshot; use deno_npm::resolution::PackageReqNotFoundError; -use deno_npm::resolution::SerializedNpmResolutionSnapshot; +use deno_npm::resolution::ValidSerializedNpmResolutionSnapshot; use deno_npm::NpmPackageId; use deno_npm::NpmResolutionPackage; use deno_npm::NpmSystemInfo; @@ -27,30 +28,213 @@ use deno_semver::npm::NpmPackageReqReference; use deno_semver::package::PackageNv; use deno_semver::package::PackageNvReference; use deno_semver::package::PackageReq; -use serde::Deserialize; -use serde::Serialize; use crate::args::Lockfile; +use crate::args::NpmProcessState; +use crate::args::PackageJsonDepsProvider; use crate::util::fs::canonicalize_path_maybe_not_exists_with_fs; +use crate::util::progress_bar::ProgressBar; + +use self::cache::NpmCache; +use self::installer::PackageJsonDepsInstaller; +use self::registry::CliNpmRegistryApi; +use self::resolution::NpmResolution; +use self::resolvers::create_npm_fs_resolver; +use self::resolvers::NpmPackageFsResolver; -use super::CliNpmRegistryApi; use super::CliNpmResolver; use super::InnerCliNpmResolverRef; +use super::NpmCacheDir; -pub use self::installer::PackageJsonDepsInstaller; -pub use self::resolution::NpmResolution; -pub use self::resolvers::create_npm_fs_resolver; -pub use self::resolvers::NpmPackageFsResolver; - +mod cache; mod installer; +mod registry; mod resolution; mod resolvers; +mod tarball; + +pub enum CliNpmResolverManagedSnapshotOption { + ResolveFromLockfile(Arc<Mutex<Lockfile>>), + Specified(Option<ValidSerializedNpmResolutionSnapshot>), +} + +pub enum CliNpmResolverManagedPackageJsonInstallerOption { + ConditionalInstall(Arc<PackageJsonDepsProvider>), + NoInstall, +} + +pub struct CliNpmResolverManagedCreateOptions { + pub snapshot: CliNpmResolverManagedSnapshotOption, + pub maybe_lockfile: Option<Arc<Mutex<Lockfile>>>, + pub fs: Arc<dyn deno_runtime::deno_fs::FileSystem>, + pub http_client: Arc<crate::http_util::HttpClient>, + pub npm_global_cache_dir: PathBuf, + pub cache_setting: crate::args::CacheSetting, + pub text_only_progress_bar: crate::util::progress_bar::ProgressBar, + pub maybe_node_modules_path: Option<PathBuf>, + pub npm_system_info: NpmSystemInfo, + pub package_json_installer: CliNpmResolverManagedPackageJsonInstallerOption, + pub npm_registry_url: Url, +} + +pub async fn create_managed_npm_resolver_for_lsp( + options: CliNpmResolverManagedCreateOptions, +) -> Arc<dyn CliNpmResolver> { + let npm_cache = create_cache(&options); + let npm_api = create_api(&options, npm_cache.clone()); + let snapshot = match resolve_snapshot(&npm_api, options.snapshot).await { + Ok(snapshot) => snapshot, + Err(err) => { + log::warn!("failed to resolve snapshot: {}", err); + None + } + }; + create_inner( + npm_cache, + npm_api, + snapshot, + options.maybe_lockfile, + options.fs, + options.text_only_progress_bar, + options.maybe_node_modules_path, + options.package_json_installer, + options.npm_registry_url, + options.npm_system_info, + ) +} -/// State provided to the process via an environment variable. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct NpmProcessState { - pub snapshot: SerializedNpmResolutionSnapshot, - pub local_node_modules_path: Option<String>, +pub async fn create_managed_npm_resolver( + options: CliNpmResolverManagedCreateOptions, +) -> Result<Arc<dyn CliNpmResolver>, AnyError> { + let npm_cache = create_cache(&options); + let npm_api = create_api(&options, npm_cache.clone()); + let snapshot = resolve_snapshot(&npm_api, options.snapshot).await?; + Ok(create_inner( + npm_cache, + npm_api, + snapshot, + options.maybe_lockfile, + options.fs, + options.text_only_progress_bar, + options.maybe_node_modules_path, + options.package_json_installer, + options.npm_registry_url, + options.npm_system_info, + )) +} + +#[allow(clippy::too_many_arguments)] +fn create_inner( + npm_cache: Arc<NpmCache>, + npm_api: Arc<CliNpmRegistryApi>, + snapshot: Option<ValidSerializedNpmResolutionSnapshot>, + maybe_lockfile: Option<Arc<Mutex<Lockfile>>>, + fs: Arc<dyn deno_runtime::deno_fs::FileSystem>, + text_only_progress_bar: crate::util::progress_bar::ProgressBar, + node_modules_dir_path: Option<PathBuf>, + package_json_installer: CliNpmResolverManagedPackageJsonInstallerOption, + npm_registry_url: Url, + npm_system_info: NpmSystemInfo, +) -> Arc<dyn CliNpmResolver> { + let resolution = Arc::new(NpmResolution::from_serialized( + npm_api.clone(), + snapshot, + maybe_lockfile.clone(), + )); + let npm_fs_resolver = create_npm_fs_resolver( + fs.clone(), + npm_cache.clone(), + &text_only_progress_bar, + npm_registry_url, + resolution.clone(), + node_modules_dir_path, + npm_system_info.clone(), + ); + let package_json_deps_installer = match package_json_installer { + CliNpmResolverManagedPackageJsonInstallerOption::ConditionalInstall( + provider, + ) => Arc::new(PackageJsonDepsInstaller::new( + provider, + npm_api.clone(), + resolution.clone(), + )), + CliNpmResolverManagedPackageJsonInstallerOption::NoInstall => { + Arc::new(PackageJsonDepsInstaller::no_op()) + } + }; + Arc::new(ManagedCliNpmResolver::new( + npm_api, + fs, + resolution, + npm_fs_resolver, + npm_cache, + maybe_lockfile, + package_json_deps_installer, + text_only_progress_bar, + npm_system_info, + )) +} + +fn create_cache(options: &CliNpmResolverManagedCreateOptions) -> Arc<NpmCache> { + Arc::new(NpmCache::new( + NpmCacheDir::new(options.npm_global_cache_dir.clone()), + options.cache_setting.clone(), + options.fs.clone(), + options.http_client.clone(), + options.text_only_progress_bar.clone(), + )) +} + +fn create_api( + options: &CliNpmResolverManagedCreateOptions, + npm_cache: Arc<NpmCache>, +) -> Arc<CliNpmRegistryApi> { + Arc::new(CliNpmRegistryApi::new( + options.npm_registry_url.clone(), + npm_cache.clone(), + options.http_client.clone(), + options.text_only_progress_bar.clone(), + )) +} + +async fn resolve_snapshot( + api: &CliNpmRegistryApi, + snapshot: CliNpmResolverManagedSnapshotOption, +) -> Result<Option<ValidSerializedNpmResolutionSnapshot>, AnyError> { + match snapshot { + CliNpmResolverManagedSnapshotOption::ResolveFromLockfile(lockfile) => { + if !lockfile.lock().overwrite { + let snapshot = snapshot_from_lockfile(lockfile.clone(), api) + .await + .with_context(|| { + format!( + "failed reading lockfile '{}'", + lockfile.lock().filename.display() + ) + })?; + // clear the memory cache to reduce memory usage + api.clear_memory_cache(); + Ok(Some(snapshot)) + } else { + Ok(None) + } + } + CliNpmResolverManagedSnapshotOption::Specified(snapshot) => Ok(snapshot), + } +} + +async fn snapshot_from_lockfile( + lockfile: Arc<Mutex<Lockfile>>, + api: &dyn NpmRegistryApi, +) -> Result<ValidSerializedNpmResolutionSnapshot, AnyError> { + let incomplete_snapshot = { + let lock = lockfile.lock(); + deno_npm::resolution::incomplete_snapshot_from_lockfile(&lock)? + }; + let snapshot = + deno_npm::resolution::snapshot_from_lockfile(incomplete_snapshot, api) + .await?; + Ok(snapshot) } /// An npm resolver where the resolution is managed by Deno rather than @@ -59,40 +243,45 @@ pub struct ManagedCliNpmResolver { api: Arc<CliNpmRegistryApi>, fs: Arc<dyn FileSystem>, fs_resolver: Arc<dyn NpmPackageFsResolver>, + global_npm_cache: Arc<NpmCache>, resolution: Arc<NpmResolution>, maybe_lockfile: Option<Arc<Mutex<Lockfile>>>, + npm_system_info: NpmSystemInfo, + progress_bar: ProgressBar, package_json_deps_installer: Arc<PackageJsonDepsInstaller>, } impl std::fmt::Debug for ManagedCliNpmResolver { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ManagedNpmResolver") - .field("api", &"<omitted>") - .field("fs", &"<omitted>") - .field("fs_resolver", &"<omitted>") - .field("resolution", &"<omitted>") - .field("maybe_lockfile", &"<omitted>") - .field("package_json_deps_installer", &"<omitted>") + .field("<omitted>", &"<omitted>") .finish() } } impl ManagedCliNpmResolver { + #[allow(clippy::too_many_arguments)] pub fn new( api: Arc<CliNpmRegistryApi>, fs: Arc<dyn FileSystem>, resolution: Arc<NpmResolution>, fs_resolver: Arc<dyn NpmPackageFsResolver>, + global_npm_cache: Arc<NpmCache>, maybe_lockfile: Option<Arc<Mutex<Lockfile>>>, package_json_deps_installer: Arc<PackageJsonDepsInstaller>, + progress_bar: ProgressBar, + npm_system_info: NpmSystemInfo, ) -> Self { Self { api, fs, fs_resolver, + global_npm_cache, resolution, maybe_lockfile, package_json_deps_installer, + progress_bar, + npm_system_info, } } @@ -191,6 +380,15 @@ impl ManagedCliNpmResolver { self.resolution.snapshot() } + pub fn serialized_valid_snapshot_for_system( + &self, + system_info: &NpmSystemInfo, + ) -> ValidSerializedNpmResolutionSnapshot { + self + .resolution + .serialized_valid_snapshot_for_system(system_info) + } + pub fn lock(&self, lockfile: &mut Lockfile) -> Result<(), AnyError> { self.resolution.lock(lockfile) } @@ -208,8 +406,11 @@ impl ManagedCliNpmResolver { pub async fn resolve_pending(&self) -> Result<(), AnyError> { self.resolution.resolve_pending().await?; - self.fs_resolver.cache_packages().await?; - Ok(()) + self.cache_packages().await + } + + pub async fn cache_packages(&self) -> Result<(), AnyError> { + self.fs_resolver.cache_packages().await } fn resolve_pkg_id_from_pkg_req( @@ -240,6 +441,17 @@ impl ManagedCliNpmResolver { .map(|_| ()) .map_err(|err| err.into()) } + + pub fn registry_base_url(&self) -> &ModuleSpecifier { + self.api.base_url() + } + + pub fn registry_folder_in_global_cache( + &self, + registry_url: &ModuleSpecifier, + ) -> PathBuf { + self.global_npm_cache.registry_folder(registry_url) + } } impl NpmResolver for ManagedCliNpmResolver { @@ -283,6 +495,35 @@ impl CliNpmResolver for ManagedCliNpmResolver { self } + fn clone_snapshotted(&self) -> Arc<dyn CliNpmResolver> { + // create a new snapshotted npm resolution and resolver + let npm_resolution = Arc::new(NpmResolution::new( + self.api.clone(), + self.resolution.snapshot(), + self.maybe_lockfile.clone(), + )); + + Arc::new(ManagedCliNpmResolver::new( + self.api.clone(), + self.fs.clone(), + npm_resolution.clone(), + create_npm_fs_resolver( + self.fs.clone(), + self.global_npm_cache.clone(), + &self.progress_bar, + self.api.base_url().clone(), + npm_resolution, + self.node_modules_path(), + self.npm_system_info.clone(), + ), + self.global_npm_cache.clone(), + self.maybe_lockfile.clone(), + self.package_json_deps_installer.clone(), + self.progress_bar.clone(), + self.npm_system_info.clone(), + )) + } + fn root_dir_url(&self) -> &Url { self.fs_resolver.root_dir_url() } diff --git a/cli/npm/managed/registry.rs b/cli/npm/managed/registry.rs new file mode 100644 index 000000000..2466f4713 --- /dev/null +++ b/cli/npm/managed/registry.rs @@ -0,0 +1,358 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashMap; +use std::collections::HashSet; +use std::fs; +use std::io::ErrorKind; +use std::path::PathBuf; +use std::sync::Arc; + +use async_trait::async_trait; +use deno_core::anyhow::anyhow; +use deno_core::anyhow::Context; +use deno_core::error::custom_error; +use deno_core::error::AnyError; +use deno_core::futures::future::BoxFuture; +use deno_core::futures::future::Shared; +use deno_core::futures::FutureExt; +use deno_core::parking_lot::Mutex; +use deno_core::serde_json; +use deno_core::url::Url; +use deno_npm::registry::NpmPackageInfo; +use deno_npm::registry::NpmRegistryApi; +use deno_npm::registry::NpmRegistryPackageInfoLoadError; + +use crate::args::CacheSetting; +use crate::cache::CACHE_PERM; +use crate::http_util::HttpClient; +use crate::util::fs::atomic_write_file; +use crate::util::progress_bar::ProgressBar; +use crate::util::sync::AtomicFlag; + +use super::cache::NpmCache; + +#[derive(Debug)] +pub struct CliNpmRegistryApi(Option<Arc<CliNpmRegistryApiInner>>); + +impl CliNpmRegistryApi { + pub fn new( + base_url: Url, + cache: Arc<NpmCache>, + http_client: Arc<HttpClient>, + progress_bar: ProgressBar, + ) -> Self { + Self(Some(Arc::new(CliNpmRegistryApiInner { + base_url, + cache, + force_reload_flag: Default::default(), + mem_cache: Default::default(), + previously_reloaded_packages: Default::default(), + http_client, + progress_bar, + }))) + } + + /// Clears the internal memory cache. + pub fn clear_memory_cache(&self) { + self.inner().clear_memory_cache(); + } + + pub fn get_cached_package_info( + &self, + name: &str, + ) -> Option<Arc<NpmPackageInfo>> { + self.inner().get_cached_package_info(name) + } + + pub fn base_url(&self) -> &Url { + &self.inner().base_url + } + + fn inner(&self) -> &Arc<CliNpmRegistryApiInner> { + // this panicking indicates a bug in the code where this + // wasn't initialized + self.0.as_ref().unwrap() + } +} + +#[async_trait] +impl NpmRegistryApi for CliNpmRegistryApi { + async fn package_info( + &self, + name: &str, + ) -> Result<Arc<NpmPackageInfo>, NpmRegistryPackageInfoLoadError> { + match self.inner().maybe_package_info(name).await { + Ok(Some(info)) => Ok(info), + Ok(None) => Err(NpmRegistryPackageInfoLoadError::PackageNotExists { + package_name: name.to_string(), + }), + Err(err) => { + Err(NpmRegistryPackageInfoLoadError::LoadError(Arc::new(err))) + } + } + } + + fn mark_force_reload(&self) -> bool { + // never force reload the registry information if reloading + // is disabled or if we're already reloading + if matches!( + self.inner().cache.cache_setting(), + CacheSetting::Only | CacheSetting::ReloadAll + ) { + return false; + } + if self.inner().force_reload_flag.raise() { + self.clear_memory_cache(); // clear the cache to force reloading + true + } else { + false + } + } +} + +type CacheItemPendingResult = + Result<Option<Arc<NpmPackageInfo>>, Arc<AnyError>>; + +#[derive(Debug)] +enum CacheItem { + Pending(Shared<BoxFuture<'static, CacheItemPendingResult>>), + Resolved(Option<Arc<NpmPackageInfo>>), +} + +#[derive(Debug)] +struct CliNpmRegistryApiInner { + base_url: Url, + cache: Arc<NpmCache>, + force_reload_flag: AtomicFlag, + mem_cache: Mutex<HashMap<String, CacheItem>>, + previously_reloaded_packages: Mutex<HashSet<String>>, + http_client: Arc<HttpClient>, + progress_bar: ProgressBar, +} + +impl CliNpmRegistryApiInner { + pub async fn maybe_package_info( + self: &Arc<Self>, + name: &str, + ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> { + let (created, future) = { + let mut mem_cache = self.mem_cache.lock(); + match mem_cache.get(name) { + Some(CacheItem::Resolved(maybe_info)) => { + return Ok(maybe_info.clone()); + } + Some(CacheItem::Pending(future)) => (false, future.clone()), + None => { + if (self.cache.cache_setting().should_use_for_npm_package(name) && !self.force_reload()) + // if this has been previously reloaded, then try loading from the + // file system cache + || !self.previously_reloaded_packages.lock().insert(name.to_string()) + { + // attempt to load from the file cache + if let Some(info) = self.load_file_cached_package_info(name) { + let result = Some(Arc::new(info)); + mem_cache + .insert(name.to_string(), CacheItem::Resolved(result.clone())); + return Ok(result); + } + } + + let future = { + let api = self.clone(); + let name = name.to_string(); + async move { + api + .load_package_info_from_registry(&name) + .await + .map(|info| info.map(Arc::new)) + .map_err(Arc::new) + } + .boxed() + .shared() + }; + mem_cache + .insert(name.to_string(), CacheItem::Pending(future.clone())); + (true, future) + } + } + }; + + if created { + match future.await { + Ok(maybe_info) => { + // replace the cache item to say it's resolved now + self + .mem_cache + .lock() + .insert(name.to_string(), CacheItem::Resolved(maybe_info.clone())); + Ok(maybe_info) + } + Err(err) => { + // purge the item from the cache so it loads next time + self.mem_cache.lock().remove(name); + Err(anyhow!("{:#}", err)) + } + } + } else { + Ok(future.await.map_err(|err| anyhow!("{:#}", err))?) + } + } + + fn force_reload(&self) -> bool { + self.force_reload_flag.is_raised() + } + + fn load_file_cached_package_info( + &self, + name: &str, + ) -> Option<NpmPackageInfo> { + match self.load_file_cached_package_info_result(name) { + Ok(value) => value, + Err(err) => { + if cfg!(debug_assertions) { + panic!("error loading cached npm package info for {name}: {err:#}"); + } else { + None + } + } + } + } + + fn load_file_cached_package_info_result( + &self, + name: &str, + ) -> Result<Option<NpmPackageInfo>, AnyError> { + let file_cache_path = self.get_package_file_cache_path(name); + let file_text = match fs::read_to_string(file_cache_path) { + Ok(file_text) => file_text, + Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err.into()), + }; + match serde_json::from_str(&file_text) { + Ok(package_info) => Ok(Some(package_info)), + Err(err) => { + // This scenario might mean we need to load more data from the + // npm registry than before. So, just debug log while in debug + // rather than panic. + log::debug!( + "error deserializing registry.json for '{}'. Reloading. {:?}", + name, + err + ); + Ok(None) + } + } + } + + fn save_package_info_to_file_cache( + &self, + name: &str, + package_info: &NpmPackageInfo, + ) { + if let Err(err) = + self.save_package_info_to_file_cache_result(name, package_info) + { + if cfg!(debug_assertions) { + panic!("error saving cached npm package info for {name}: {err:#}"); + } + } + } + + fn save_package_info_to_file_cache_result( + &self, + name: &str, + package_info: &NpmPackageInfo, + ) -> Result<(), AnyError> { + let file_cache_path = self.get_package_file_cache_path(name); + let file_text = serde_json::to_string(&package_info)?; + atomic_write_file(&file_cache_path, file_text, CACHE_PERM)?; + Ok(()) + } + + async fn load_package_info_from_registry( + &self, + name: &str, + ) -> Result<Option<NpmPackageInfo>, AnyError> { + self + .load_package_info_from_registry_inner(name) + .await + .with_context(|| { + format!( + "Error getting response at {} for package \"{}\"", + self.get_package_url(name), + name + ) + }) + } + + async fn load_package_info_from_registry_inner( + &self, + name: &str, + ) -> Result<Option<NpmPackageInfo>, AnyError> { + if *self.cache.cache_setting() == CacheSetting::Only { + return Err(custom_error( + "NotCached", + format!( + "An npm specifier not found in cache: \"{name}\", --cached-only is specified." + ) + )); + } + + let package_url = self.get_package_url(name); + let guard = self.progress_bar.update(package_url.as_str()); + + let maybe_bytes = self + .http_client + .download_with_progress(package_url, &guard) + .await?; + match maybe_bytes { + Some(bytes) => { + let package_info = serde_json::from_slice(&bytes)?; + self.save_package_info_to_file_cache(name, &package_info); + Ok(Some(package_info)) + } + None => Ok(None), + } + } + + fn get_package_url(&self, name: &str) -> Url { + // list of all characters used in npm packages: + // !, ', (, ), *, -, ., /, [0-9], @, [A-Za-z], _, ~ + const ASCII_SET: percent_encoding::AsciiSet = + percent_encoding::NON_ALPHANUMERIC + .remove(b'!') + .remove(b'\'') + .remove(b'(') + .remove(b')') + .remove(b'*') + .remove(b'-') + .remove(b'.') + .remove(b'/') + .remove(b'@') + .remove(b'_') + .remove(b'~'); + let name = percent_encoding::utf8_percent_encode(name, &ASCII_SET); + self.base_url.join(&name.to_string()).unwrap() + } + + fn get_package_file_cache_path(&self, name: &str) -> PathBuf { + let name_folder_path = self.cache.package_name_folder(name, &self.base_url); + name_folder_path.join("registry.json") + } + + fn clear_memory_cache(&self) { + self.mem_cache.lock().clear(); + } + + pub fn get_cached_package_info( + &self, + name: &str, + ) -> Option<Arc<NpmPackageInfo>> { + let mem_cache = self.mem_cache.lock(); + if let Some(CacheItem::Resolved(maybe_info)) = mem_cache.get(name) { + maybe_info.clone() + } else { + None + } + } +} diff --git a/cli/npm/managed/resolution.rs b/cli/npm/managed/resolution.rs index 05c1227a7..f05275f3c 100644 --- a/cli/npm/managed/resolution.rs +++ b/cli/npm/managed/resolution.rs @@ -34,7 +34,7 @@ use deno_semver::VersionReq; use crate::args::Lockfile; use crate::util::sync::TaskQueue; -use super::super::registry::CliNpmRegistryApi; +use super::CliNpmRegistryApi; /// Handles updating and storing npm resolution in memory where the underlying /// snapshot can be updated concurrently. Additionally handles updating the lockfile @@ -221,8 +221,6 @@ impl NpmResolution { .map(|pkg| pkg.id.clone()) } - // todo: NEXT - /// Resolves a package requirement for deno graph. This should only be /// called by deno_graph's NpmResolver or for resolving packages in /// a package.json @@ -275,14 +273,6 @@ impl NpmResolution { .all_system_packages_partitioned(system_info) } - // todo: NEXT - - pub fn has_packages(&self) -> bool { - !self.snapshot.read().is_empty() - } - - // todo: NEXT - pub fn snapshot(&self) -> NpmResolutionSnapshot { self.snapshot.read().clone() } @@ -293,8 +283,6 @@ impl NpmResolution { self.snapshot.read().as_valid_serialized() } - // todo: NEXT - pub fn serialized_valid_snapshot_for_system( &self, system_info: &NpmSystemInfo, diff --git a/cli/npm/managed/resolvers/common.rs b/cli/npm/managed/resolvers/common.rs index 4076579bf..b0f375779 100644 --- a/cli/npm/managed/resolvers/common.rs +++ b/cli/npm/managed/resolvers/common.rs @@ -20,7 +20,7 @@ use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; -use crate::npm::NpmCache; +use super::super::cache::NpmCache; /// Part of the resolution that interacts with the file system. #[async_trait] diff --git a/cli/npm/managed/resolvers/global.rs b/cli/npm/managed/resolvers/global.rs index 25db62f73..3f042a38b 100644 --- a/cli/npm/managed/resolvers/global.rs +++ b/cli/npm/managed/resolvers/global.rs @@ -20,8 +20,7 @@ use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::NodePermissions; use deno_runtime::deno_node::NodeResolutionMode; -use crate::npm::NpmCache; - +use super::super::cache::NpmCache; use super::super::resolution::NpmResolution; use super::common::cache_packages; use super::common::types_package_name; diff --git a/cli/npm/managed/resolvers/local.rs b/cli/npm/managed/resolvers/local.rs index 57170eccd..8e4d72f26 100644 --- a/cli/npm/managed/resolvers/local.rs +++ b/cli/npm/managed/resolvers/local.rs @@ -12,7 +12,7 @@ use std::path::PathBuf; use std::sync::Arc; use crate::cache::CACHE_PERM; -use crate::npm::cache::mixed_case_package_name_decode; +use crate::npm::cache_dir::mixed_case_package_name_decode; use crate::util::fs::atomic_write_file; use crate::util::fs::canonicalize_path_maybe_not_exists_with_fs; use crate::util::fs::symlink_dir; @@ -41,11 +41,11 @@ use deno_semver::package::PackageNv; use serde::Deserialize; use serde::Serialize; -use crate::npm::cache::mixed_case_package_name_encode; -use crate::npm::NpmCache; +use crate::npm::cache_dir::mixed_case_package_name_encode; use crate::util::fs::copy_dir_recursive; use crate::util::fs::hard_link_dir_recursive; +use super::super::cache::NpmCache; use super::super::resolution::NpmResolution; use super::common::types_package_name; use super::common::NpmPackageFsResolver; diff --git a/cli/npm/managed/resolvers/mod.rs b/cli/npm/managed/resolvers/mod.rs index b6d96c4af..5fc140f26 100644 --- a/cli/npm/managed/resolvers/mod.rs +++ b/cli/npm/managed/resolvers/mod.rs @@ -11,14 +11,15 @@ use deno_core::url::Url; use deno_npm::NpmSystemInfo; use deno_runtime::deno_fs::FileSystem; -use crate::npm::NpmCache; use crate::util::progress_bar::ProgressBar; pub use self::common::NpmPackageFsResolver; + use self::global::GlobalNpmPackageResolver; use self::local::LocalNpmPackageResolver; -use super::NpmResolution; +use super::cache::NpmCache; +use super::resolution::NpmResolution; pub fn create_npm_fs_resolver( fs: Arc<dyn FileSystem>, diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs new file mode 100644 index 000000000..e72b1afc8 --- /dev/null +++ b/cli/npm/managed/tarball.rs @@ -0,0 +1,241 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashSet; +use std::fs; +use std::path::Path; +use std::path::PathBuf; + +use deno_core::anyhow::bail; +use deno_core::error::AnyError; +use deno_npm::registry::NpmPackageVersionDistInfo; +use deno_npm::registry::NpmPackageVersionDistInfoIntegrity; +use deno_semver::package::PackageNv; +use flate2::read::GzDecoder; +use tar::Archive; +use tar::EntryType; + +use super::cache::with_folder_sync_lock; + +pub fn verify_and_extract_tarball( + package: &PackageNv, + data: &[u8], + dist_info: &NpmPackageVersionDistInfo, + output_folder: &Path, +) -> Result<(), AnyError> { + verify_tarball_integrity(package, data, &dist_info.integrity())?; + + with_folder_sync_lock(package, output_folder, || { + extract_tarball(data, output_folder) + }) +} + +fn verify_tarball_integrity( + package: &PackageNv, + data: &[u8], + npm_integrity: &NpmPackageVersionDistInfoIntegrity, +) -> Result<(), AnyError> { + use ring::digest::Context; + let (tarball_checksum, expected_checksum) = match npm_integrity { + NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm, + base64_hash, + } => { + let algo = match *algorithm { + "sha512" => &ring::digest::SHA512, + "sha1" => &ring::digest::SHA1_FOR_LEGACY_USE_ONLY, + hash_kind => bail!( + "Not implemented hash function for {}: {}", + package, + hash_kind + ), + }; + let mut hash_ctx = Context::new(algo); + hash_ctx.update(data); + let digest = hash_ctx.finish(); + let tarball_checksum = base64::encode(digest.as_ref()).to_lowercase(); + (tarball_checksum, base64_hash.to_lowercase()) + } + NpmPackageVersionDistInfoIntegrity::LegacySha1Hex(hex) => { + let mut hash_ctx = Context::new(&ring::digest::SHA1_FOR_LEGACY_USE_ONLY); + hash_ctx.update(data); + let digest = hash_ctx.finish(); + let tarball_checksum = hex::encode(digest.as_ref()).to_lowercase(); + (tarball_checksum, hex.to_lowercase()) + } + NpmPackageVersionDistInfoIntegrity::UnknownIntegrity(integrity) => { + bail!( + "Not implemented integrity kind for {}: {}", + package, + integrity + ) + } + }; + + if tarball_checksum != expected_checksum { + bail!( + "Tarball checksum did not match what was provided by npm registry for {}.\n\nExpected: {}\nActual: {}", + package, + expected_checksum, + tarball_checksum, + ) + } + Ok(()) +} + +fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> { + fs::create_dir_all(output_folder)?; + let output_folder = fs::canonicalize(output_folder)?; + let tar = GzDecoder::new(data); + let mut archive = Archive::new(tar); + archive.set_overwrite(true); + archive.set_preserve_permissions(true); + let mut created_dirs = HashSet::new(); + + for entry in archive.entries()? { + let mut entry = entry?; + let path = entry.path()?; + let entry_type = entry.header().entry_type(); + + // Some package tarballs contain "pax_global_header", these entries + // should be skipped. + if entry_type == EntryType::XGlobalHeader { + continue; + } + + // skip the first component which will be either "package" or the name of the package + let relative_path = path.components().skip(1).collect::<PathBuf>(); + let absolute_path = output_folder.join(relative_path); + let dir_path = if entry_type == EntryType::Directory { + absolute_path.as_path() + } else { + absolute_path.parent().unwrap() + }; + if created_dirs.insert(dir_path.to_path_buf()) { + fs::create_dir_all(dir_path)?; + let canonicalized_dir = fs::canonicalize(dir_path)?; + if !canonicalized_dir.starts_with(&output_folder) { + bail!( + "Extracted directory '{}' of npm tarball was not in output directory.", + canonicalized_dir.display() + ) + } + } + + let entry_type = entry.header().entry_type(); + match entry_type { + EntryType::Regular => { + entry.unpack(&absolute_path)?; + } + EntryType::Symlink | EntryType::Link => { + // At the moment, npm doesn't seem to support uploading hardlinks or + // symlinks to the npm registry. If ever adding symlink or hardlink + // support, we will need to validate that the hardlink and symlink + // target are within the package directory. + log::warn!( + "Ignoring npm tarball entry type {:?} for '{}'", + entry_type, + absolute_path.display() + ) + } + _ => { + // ignore + } + } + } + Ok(()) +} + +#[cfg(test)] +mod test { + use deno_semver::Version; + + use super::*; + + #[test] + pub fn test_verify_tarball() { + let package = PackageNv { + name: "package".to_string(), + version: Version::parse_from_npm("1.0.0").unwrap(), + }; + let actual_checksum = + "z4phnx7vul3xvchq1m2ab9yg5aulvxxcg/spidns6c5h0ne8xyxysp+dgnkhfuwvy7kxvudbeoglodj6+sfapg=="; + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::UnknownIntegrity("test") + ) + .unwrap_err() + .to_string(), + "Not implemented integrity kind for package@1.0.0: test", + ); + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "notimplemented", + base64_hash: "test" + } + ) + .unwrap_err() + .to_string(), + "Not implemented hash function for package@1.0.0: notimplemented", + ); + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "sha1", + base64_hash: "test" + } + ) + .unwrap_err() + .to_string(), + concat!( + "Tarball checksum did not match what was provided by npm ", + "registry for package@1.0.0.\n\nExpected: test\nActual: 2jmj7l5rsw0yvb/vlwaykk/ybwk=", + ), + ); + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "sha512", + base64_hash: "test" + } + ) + .unwrap_err() + .to_string(), + format!("Tarball checksum did not match what was provided by npm registry for package@1.0.0.\n\nExpected: test\nActual: {actual_checksum}"), + ); + assert!(verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::Integrity { + algorithm: "sha512", + base64_hash: actual_checksum, + }, + ) + .is_ok()); + let actual_hex = "da39a3ee5e6b4b0d3255bfef95601890afd80709"; + assert_eq!( + verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::LegacySha1Hex("test"), + ) + .unwrap_err() + .to_string(), + format!("Tarball checksum did not match what was provided by npm registry for package@1.0.0.\n\nExpected: test\nActual: {actual_hex}"), + ); + assert!(verify_tarball_integrity( + &package, + &Vec::new(), + &NpmPackageVersionDistInfoIntegrity::LegacySha1Hex(actual_hex), + ) + .is_ok()); + } +} |