diff options
Diffstat (limited to 'cli/npm/managed/registry.rs')
-rw-r--r-- | cli/npm/managed/registry.rs | 358 |
1 files changed, 358 insertions, 0 deletions
diff --git a/cli/npm/managed/registry.rs b/cli/npm/managed/registry.rs new file mode 100644 index 000000000..2466f4713 --- /dev/null +++ b/cli/npm/managed/registry.rs @@ -0,0 +1,358 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashMap; +use std::collections::HashSet; +use std::fs; +use std::io::ErrorKind; +use std::path::PathBuf; +use std::sync::Arc; + +use async_trait::async_trait; +use deno_core::anyhow::anyhow; +use deno_core::anyhow::Context; +use deno_core::error::custom_error; +use deno_core::error::AnyError; +use deno_core::futures::future::BoxFuture; +use deno_core::futures::future::Shared; +use deno_core::futures::FutureExt; +use deno_core::parking_lot::Mutex; +use deno_core::serde_json; +use deno_core::url::Url; +use deno_npm::registry::NpmPackageInfo; +use deno_npm::registry::NpmRegistryApi; +use deno_npm::registry::NpmRegistryPackageInfoLoadError; + +use crate::args::CacheSetting; +use crate::cache::CACHE_PERM; +use crate::http_util::HttpClient; +use crate::util::fs::atomic_write_file; +use crate::util::progress_bar::ProgressBar; +use crate::util::sync::AtomicFlag; + +use super::cache::NpmCache; + +#[derive(Debug)] +pub struct CliNpmRegistryApi(Option<Arc<CliNpmRegistryApiInner>>); + +impl CliNpmRegistryApi { + pub fn new( + base_url: Url, + cache: Arc<NpmCache>, + http_client: Arc<HttpClient>, + progress_bar: ProgressBar, + ) -> Self { + Self(Some(Arc::new(CliNpmRegistryApiInner { + base_url, + cache, + force_reload_flag: Default::default(), + mem_cache: Default::default(), + previously_reloaded_packages: Default::default(), + http_client, + progress_bar, + }))) + } + + /// Clears the internal memory cache. + pub fn clear_memory_cache(&self) { + self.inner().clear_memory_cache(); + } + + pub fn get_cached_package_info( + &self, + name: &str, + ) -> Option<Arc<NpmPackageInfo>> { + self.inner().get_cached_package_info(name) + } + + pub fn base_url(&self) -> &Url { + &self.inner().base_url + } + + fn inner(&self) -> &Arc<CliNpmRegistryApiInner> { + // this panicking indicates a bug in the code where this + // wasn't initialized + self.0.as_ref().unwrap() + } +} + +#[async_trait] +impl NpmRegistryApi for CliNpmRegistryApi { + async fn package_info( + &self, + name: &str, + ) -> Result<Arc<NpmPackageInfo>, NpmRegistryPackageInfoLoadError> { + match self.inner().maybe_package_info(name).await { + Ok(Some(info)) => Ok(info), + Ok(None) => Err(NpmRegistryPackageInfoLoadError::PackageNotExists { + package_name: name.to_string(), + }), + Err(err) => { + Err(NpmRegistryPackageInfoLoadError::LoadError(Arc::new(err))) + } + } + } + + fn mark_force_reload(&self) -> bool { + // never force reload the registry information if reloading + // is disabled or if we're already reloading + if matches!( + self.inner().cache.cache_setting(), + CacheSetting::Only | CacheSetting::ReloadAll + ) { + return false; + } + if self.inner().force_reload_flag.raise() { + self.clear_memory_cache(); // clear the cache to force reloading + true + } else { + false + } + } +} + +type CacheItemPendingResult = + Result<Option<Arc<NpmPackageInfo>>, Arc<AnyError>>; + +#[derive(Debug)] +enum CacheItem { + Pending(Shared<BoxFuture<'static, CacheItemPendingResult>>), + Resolved(Option<Arc<NpmPackageInfo>>), +} + +#[derive(Debug)] +struct CliNpmRegistryApiInner { + base_url: Url, + cache: Arc<NpmCache>, + force_reload_flag: AtomicFlag, + mem_cache: Mutex<HashMap<String, CacheItem>>, + previously_reloaded_packages: Mutex<HashSet<String>>, + http_client: Arc<HttpClient>, + progress_bar: ProgressBar, +} + +impl CliNpmRegistryApiInner { + pub async fn maybe_package_info( + self: &Arc<Self>, + name: &str, + ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> { + let (created, future) = { + let mut mem_cache = self.mem_cache.lock(); + match mem_cache.get(name) { + Some(CacheItem::Resolved(maybe_info)) => { + return Ok(maybe_info.clone()); + } + Some(CacheItem::Pending(future)) => (false, future.clone()), + None => { + if (self.cache.cache_setting().should_use_for_npm_package(name) && !self.force_reload()) + // if this has been previously reloaded, then try loading from the + // file system cache + || !self.previously_reloaded_packages.lock().insert(name.to_string()) + { + // attempt to load from the file cache + if let Some(info) = self.load_file_cached_package_info(name) { + let result = Some(Arc::new(info)); + mem_cache + .insert(name.to_string(), CacheItem::Resolved(result.clone())); + return Ok(result); + } + } + + let future = { + let api = self.clone(); + let name = name.to_string(); + async move { + api + .load_package_info_from_registry(&name) + .await + .map(|info| info.map(Arc::new)) + .map_err(Arc::new) + } + .boxed() + .shared() + }; + mem_cache + .insert(name.to_string(), CacheItem::Pending(future.clone())); + (true, future) + } + } + }; + + if created { + match future.await { + Ok(maybe_info) => { + // replace the cache item to say it's resolved now + self + .mem_cache + .lock() + .insert(name.to_string(), CacheItem::Resolved(maybe_info.clone())); + Ok(maybe_info) + } + Err(err) => { + // purge the item from the cache so it loads next time + self.mem_cache.lock().remove(name); + Err(anyhow!("{:#}", err)) + } + } + } else { + Ok(future.await.map_err(|err| anyhow!("{:#}", err))?) + } + } + + fn force_reload(&self) -> bool { + self.force_reload_flag.is_raised() + } + + fn load_file_cached_package_info( + &self, + name: &str, + ) -> Option<NpmPackageInfo> { + match self.load_file_cached_package_info_result(name) { + Ok(value) => value, + Err(err) => { + if cfg!(debug_assertions) { + panic!("error loading cached npm package info for {name}: {err:#}"); + } else { + None + } + } + } + } + + fn load_file_cached_package_info_result( + &self, + name: &str, + ) -> Result<Option<NpmPackageInfo>, AnyError> { + let file_cache_path = self.get_package_file_cache_path(name); + let file_text = match fs::read_to_string(file_cache_path) { + Ok(file_text) => file_text, + Err(err) if err.kind() == ErrorKind::NotFound => return Ok(None), + Err(err) => return Err(err.into()), + }; + match serde_json::from_str(&file_text) { + Ok(package_info) => Ok(Some(package_info)), + Err(err) => { + // This scenario might mean we need to load more data from the + // npm registry than before. So, just debug log while in debug + // rather than panic. + log::debug!( + "error deserializing registry.json for '{}'. Reloading. {:?}", + name, + err + ); + Ok(None) + } + } + } + + fn save_package_info_to_file_cache( + &self, + name: &str, + package_info: &NpmPackageInfo, + ) { + if let Err(err) = + self.save_package_info_to_file_cache_result(name, package_info) + { + if cfg!(debug_assertions) { + panic!("error saving cached npm package info for {name}: {err:#}"); + } + } + } + + fn save_package_info_to_file_cache_result( + &self, + name: &str, + package_info: &NpmPackageInfo, + ) -> Result<(), AnyError> { + let file_cache_path = self.get_package_file_cache_path(name); + let file_text = serde_json::to_string(&package_info)?; + atomic_write_file(&file_cache_path, file_text, CACHE_PERM)?; + Ok(()) + } + + async fn load_package_info_from_registry( + &self, + name: &str, + ) -> Result<Option<NpmPackageInfo>, AnyError> { + self + .load_package_info_from_registry_inner(name) + .await + .with_context(|| { + format!( + "Error getting response at {} for package \"{}\"", + self.get_package_url(name), + name + ) + }) + } + + async fn load_package_info_from_registry_inner( + &self, + name: &str, + ) -> Result<Option<NpmPackageInfo>, AnyError> { + if *self.cache.cache_setting() == CacheSetting::Only { + return Err(custom_error( + "NotCached", + format!( + "An npm specifier not found in cache: \"{name}\", --cached-only is specified." + ) + )); + } + + let package_url = self.get_package_url(name); + let guard = self.progress_bar.update(package_url.as_str()); + + let maybe_bytes = self + .http_client + .download_with_progress(package_url, &guard) + .await?; + match maybe_bytes { + Some(bytes) => { + let package_info = serde_json::from_slice(&bytes)?; + self.save_package_info_to_file_cache(name, &package_info); + Ok(Some(package_info)) + } + None => Ok(None), + } + } + + fn get_package_url(&self, name: &str) -> Url { + // list of all characters used in npm packages: + // !, ', (, ), *, -, ., /, [0-9], @, [A-Za-z], _, ~ + const ASCII_SET: percent_encoding::AsciiSet = + percent_encoding::NON_ALPHANUMERIC + .remove(b'!') + .remove(b'\'') + .remove(b'(') + .remove(b')') + .remove(b'*') + .remove(b'-') + .remove(b'.') + .remove(b'/') + .remove(b'@') + .remove(b'_') + .remove(b'~'); + let name = percent_encoding::utf8_percent_encode(name, &ASCII_SET); + self.base_url.join(&name.to_string()).unwrap() + } + + fn get_package_file_cache_path(&self, name: &str) -> PathBuf { + let name_folder_path = self.cache.package_name_folder(name, &self.base_url); + name_folder_path.join("registry.json") + } + + fn clear_memory_cache(&self) { + self.mem_cache.lock().clear(); + } + + pub fn get_cached_package_info( + &self, + name: &str, + ) -> Option<Arc<NpmPackageInfo>> { + let mem_cache = self.mem_cache.lock(); + if let Some(CacheItem::Resolved(maybe_info)) = mem_cache.get(name) { + maybe_info.clone() + } else { + None + } + } +} |