diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2024-06-02 21:39:13 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-03 01:39:13 +0000 |
commit | b1f776adef6f0d0caa0b2badf9fb707cf5efa6e7 (patch) | |
tree | df801e53bb5e43268933d883f049546256ef8e7f /cli/npm/managed/cache/registry_info.rs | |
parent | eda43c46de12ed589fdbe62ba0574887cfbb3574 (diff) |
refactor: extract structs for downloading tarballs and npm registry packuments (#24067)
Diffstat (limited to 'cli/npm/managed/cache/registry_info.rs')
-rw-r--r-- | cli/npm/managed/cache/registry_info.rs | 284 |
1 files changed, 284 insertions, 0 deletions
diff --git a/cli/npm/managed/cache/registry_info.rs b/cli/npm/managed/cache/registry_info.rs new file mode 100644 index 000000000..ea6b47969 --- /dev/null +++ b/cli/npm/managed/cache/registry_info.rs @@ -0,0 +1,284 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashMap; +use std::sync::Arc; + +use deno_core::anyhow::anyhow; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::custom_error; +use deno_core::error::AnyError; +use deno_core::futures::future::BoxFuture; +use deno_core::futures::future::Shared; +use deno_core::futures::FutureExt; +use deno_core::parking_lot::Mutex; +use deno_core::serde_json; +use deno_core::url::Url; +use deno_npm::npm_rc::RegistryConfig; +use deno_npm::npm_rc::ResolvedNpmRc; +use deno_npm::registry::NpmPackageInfo; + +use crate::args::CacheSetting; +use crate::http_util::HttpClient; +use crate::npm::common::maybe_auth_header_for_npm_registry; +use crate::util::progress_bar::ProgressBar; + +use super::NpmCache; + +// todo(dsherret): create seams and unit test this + +#[derive(Debug, Clone)] +enum MemoryCacheItem { + /// The cache item hasn't loaded yet. + PendingFuture(Shared<PendingRegistryLoadFuture>), + /// The item has loaded in the past and was stored in the file system cache. + /// There is no reason to request this package from the npm registry again + /// for the duration of execution. + FsCached, + /// An item is memory cached when it fails saving to the file system cache + /// or the package does not exist. + MemoryCached(Result<Option<Arc<NpmPackageInfo>>, Arc<AnyError>>), +} + +#[derive(Debug, Clone)] +enum FutureResult { + PackageNotExists, + SavedFsCache(Arc<NpmPackageInfo>), + ErroredFsCache(Arc<NpmPackageInfo>), +} + +type PendingRegistryLoadFuture = + BoxFuture<'static, Result<FutureResult, Arc<AnyError>>>; + +/// Downloads packuments from the npm registry. +/// +/// This is shared amongst all the workers. +#[derive(Debug)] +pub struct RegistryInfoDownloader { + cache: Arc<NpmCache>, + npmrc: Arc<ResolvedNpmRc>, + progress_bar: ProgressBar, + memory_cache: Mutex<HashMap<String, MemoryCacheItem>>, +} + +impl RegistryInfoDownloader { + pub fn new( + cache: Arc<NpmCache>, + npmrc: Arc<ResolvedNpmRc>, + progress_bar: ProgressBar, + ) -> Self { + Self { + cache, + npmrc, + progress_bar, + memory_cache: Default::default(), + } + } + + pub async fn load_package_info( + &self, + name: &str, + current_runtime_http_client: &Arc<HttpClient>, + ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> { + let registry_url = self.npmrc.get_registry_url(name); + let registry_config = self.npmrc.get_registry_config(name); + + self + .load_package_info_inner( + name, + registry_url, + registry_config, + current_runtime_http_client, + ) + .await + .with_context(|| { + format!( + "Error getting response at {} for package \"{}\"", + self.get_package_url(name, registry_url), + name + ) + }) + } + + async fn load_package_info_inner( + &self, + name: &str, + registry_url: &Url, + registry_config: &RegistryConfig, + current_runtime_http_client: &Arc<HttpClient>, + ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> { + if *self.cache.cache_setting() == CacheSetting::Only { + return Err(custom_error( + "NotCached", + format!( + "An npm specifier not found in cache: \"{name}\", --cached-only is specified." + ) + )); + } + + let (created, cache_item) = { + let mut mem_cache = self.memory_cache.lock(); + if let Some(cache_item) = mem_cache.get(name) { + (false, cache_item.clone()) + } else { + let future = self.create_load_future( + name, + registry_url, + registry_config, + current_runtime_http_client, + ); + let cache_item = MemoryCacheItem::PendingFuture(future); + mem_cache.insert(name.to_string(), cache_item.clone()); + (true, cache_item) + } + }; + match cache_item { + MemoryCacheItem::FsCached => { + // this struct previously loaded from the registry, so we can load it from the file system cache + self + .load_file_cached_package_info(name) + .await + .map(|info| Some(Arc::new(info))) + } + MemoryCacheItem::MemoryCached(maybe_info) => { + maybe_info.clone().map_err(|e| anyhow!("{}", e)) + } + MemoryCacheItem::PendingFuture(future) => { + if created { + match future.await { + Ok(FutureResult::SavedFsCache(info)) => { + // return back the future and mark this package as having + // been saved in the cache for next time it's requested + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::FsCached; + Ok(Some(info)) + } + Ok(FutureResult::ErroredFsCache(info)) => { + // since saving to the fs cache failed, keep the package information in memory + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::MemoryCached(Ok(Some(info.clone()))); + Ok(Some(info)) + } + Ok(FutureResult::PackageNotExists) => { + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::MemoryCached(Ok(None)); + Ok(None) + } + Err(err) => { + let return_err = anyhow!("{}", err); + *self.memory_cache.lock().get_mut(name).unwrap() = + MemoryCacheItem::MemoryCached(Err(err)); + Err(return_err) + } + } + } else { + match future.await { + Ok(FutureResult::SavedFsCache(info)) => Ok(Some(info)), + Ok(FutureResult::ErroredFsCache(info)) => Ok(Some(info)), + Ok(FutureResult::PackageNotExists) => Ok(None), + Err(err) => Err(anyhow!("{}", err)), + } + } + } + } + } + + async fn load_file_cached_package_info( + &self, + name: &str, + ) -> Result<NpmPackageInfo, AnyError> { + // this scenario failing should be exceptionally rare so let's + // deal with improving it only when anyone runs into an issue + let maybe_package_info = deno_core::unsync::spawn_blocking({ + let cache = self.cache.clone(); + let name = name.to_string(); + move || cache.load_package_info(&name) + }) + .await + .unwrap() + .with_context(|| { + format!( + "Previously saved '{}' from the npm cache, but now it fails to load.", + name + ) + })?; + match maybe_package_info { + Some(package_info) => Ok(package_info), + None => { + bail!("The package '{}' previously saved its registry information to the file system cache, but that file no longer exists.", name) + } + } + } + + fn create_load_future( + &self, + name: &str, + registry_url: &Url, + registry_config: &RegistryConfig, + current_runtime_http_client: &Arc<HttpClient>, + ) -> Shared<PendingRegistryLoadFuture> { + let package_url = self.get_package_url(name, registry_url); + let maybe_auth_header = maybe_auth_header_for_npm_registry(registry_config); + let guard = self.progress_bar.update(package_url.as_str()); + let cache = self.cache.clone(); + let http_client = current_runtime_http_client.clone(); + let name = name.to_string(); + // force this future to be polled on the current runtime because it's not + // safe to share `HttpClient`s across runtimes and because a restart of + // npm resolution might cause this package not to be resolved again + // causing the future to never be polled + deno_core::unsync::spawn(async move { + let maybe_bytes = http_client + .download_with_progress(package_url, maybe_auth_header, &guard) + .await?; + match maybe_bytes { + Some(bytes) => { + let future_result = deno_core::unsync::spawn_blocking( + move || -> Result<FutureResult, AnyError> { + let package_info = serde_json::from_slice(&bytes)?; + match cache.save_package_info(&name, &package_info) { + Ok(()) => { + Ok(FutureResult::SavedFsCache(Arc::new(package_info))) + } + Err(err) => { + log::debug!( + "Error saving package {} to cache: {:#}", + name, + err + ); + Ok(FutureResult::ErroredFsCache(Arc::new(package_info))) + } + } + }, + ) + .await??; + Ok(future_result) + } + None => Ok(FutureResult::PackageNotExists), + } + }) + .map(|result| result.unwrap().map_err(Arc::new)) + .boxed() + .shared() + } + + fn get_package_url(&self, name: &str, registry_url: &Url) -> Url { + // list of all characters used in npm packages: + // !, ', (, ), *, -, ., /, [0-9], @, [A-Za-z], _, ~ + const ASCII_SET: percent_encoding::AsciiSet = + percent_encoding::NON_ALPHANUMERIC + .remove(b'!') + .remove(b'\'') + .remove(b'(') + .remove(b')') + .remove(b'*') + .remove(b'-') + .remove(b'.') + .remove(b'/') + .remove(b'@') + .remove(b'_') + .remove(b'~'); + let name = percent_encoding::utf8_percent_encode(name, &ASCII_SET); + registry_url.join(&name.to_string()).unwrap() + } +} |