diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2023-04-06 18:46:44 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-06 18:46:44 -0400 |
commit | d07aa4a0723b04583b7cb1e09152457d866d13d3 (patch) | |
tree | f329a30becca95583fb71b4158c939c68228ce06 /cli/npm/registry.rs | |
parent | 1586c52b5b5ad511ec0bf896e94de8585f743cf8 (diff) |
refactor(npm): use deno_npm and deno_semver (#18602)
Diffstat (limited to 'cli/npm/registry.rs')
-rw-r--r-- | cli/npm/registry.rs | 613 |
1 files changed, 121 insertions, 492 deletions
diff --git a/cli/npm/registry.rs b/cli/npm/registry.rs index 75760c171..0dcdb720a 100644 --- a/cli/npm/registry.rs +++ b/cli/npm/registry.rs @@ -1,7 +1,5 @@ // Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. -use std::borrow::Cow; -use std::cmp::Ordering; use std::collections::HashMap; use std::collections::HashSet; use std::fs; @@ -10,21 +8,21 @@ use std::path::PathBuf; use std::sync::Arc; use async_trait::async_trait; -use deno_core::anyhow::bail; +use deno_core::anyhow::anyhow; use deno_core::anyhow::Context; use deno_core::error::custom_error; use deno_core::error::AnyError; -use deno_core::futures; +use deno_core::futures::future::BoxFuture; +use deno_core::futures::future::Shared; +use deno_core::futures::FutureExt; use deno_core::parking_lot::Mutex; -use deno_core::serde::Deserialize; use deno_core::serde_json; use deno_core::url::Url; -use deno_graph::npm::NpmPackageNv; -use deno_graph::semver::VersionReq; +use deno_core::TaskQueue; +use deno_npm::registry::NpmPackageInfo; +use deno_npm::registry::NpmRegistryApi; use once_cell::sync::Lazy; -use serde::Serialize; -use crate::args::package_json::parse_dep_entry_name_and_raw_version; use crate::args::CacheSetting; use crate::cache::CACHE_PERM; use crate::http_util::HttpClient; @@ -34,162 +32,6 @@ use crate::util::progress_bar::ProgressBar; use super::cache::should_sync_download; use super::cache::NpmCache; -// npm registry docs: https://github.com/npm/registry/blob/master/docs/REGISTRY-API.md - -#[derive(Debug, Default, Deserialize, Serialize, Clone)] -pub struct NpmPackageInfo { - pub name: String, - pub versions: HashMap<String, NpmPackageVersionInfo>, - #[serde(rename = "dist-tags")] - pub dist_tags: HashMap<String, String>, -} - -#[derive(Debug, Clone, Eq, PartialEq)] -pub enum NpmDependencyEntryKind { - Dep, - Peer, - OptionalPeer, -} - -impl NpmDependencyEntryKind { - pub fn is_optional(&self) -> bool { - matches!(self, NpmDependencyEntryKind::OptionalPeer) - } -} - -#[derive(Debug, Clone, Eq, PartialEq)] -pub struct NpmDependencyEntry { - pub kind: NpmDependencyEntryKind, - pub bare_specifier: String, - pub name: String, - pub version_req: VersionReq, - /// When the dependency is also marked as a peer dependency, - /// use this entry to resolve the dependency when it can't - /// be resolved as a peer dependency. - pub peer_dep_version_req: Option<VersionReq>, -} - -impl PartialOrd for NpmDependencyEntry { - fn partial_cmp(&self, other: &Self) -> Option<Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for NpmDependencyEntry { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - // sort the dependencies alphabetically by name then by version descending - match self.name.cmp(&other.name) { - // sort by newest to oldest - Ordering::Equal => other - .version_req - .version_text() - .cmp(self.version_req.version_text()), - ordering => ordering, - } - } -} - -#[derive(Debug, Default, Deserialize, Serialize, Clone, PartialEq, Eq)] -pub struct NpmPeerDependencyMeta { - #[serde(default)] - optional: bool, -} - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)] -#[serde(untagged)] -pub enum NpmPackageVersionBinEntry { - String(String), - Map(HashMap<String, String>), -} - -#[derive(Debug, Default, Deserialize, Serialize, Clone, PartialEq, Eq)] -#[serde(rename_all = "camelCase")] -pub struct NpmPackageVersionInfo { - pub version: String, - pub dist: NpmPackageVersionDistInfo, - pub bin: Option<NpmPackageVersionBinEntry>, - // Bare specifier to version (ex. `"typescript": "^3.0.1") or possibly - // package and version (ex. `"typescript-3.0.1": "npm:typescript@3.0.1"`). - #[serde(default)] - pub dependencies: HashMap<String, String>, - #[serde(default)] - pub peer_dependencies: HashMap<String, String>, - #[serde(default)] - pub peer_dependencies_meta: HashMap<String, NpmPeerDependencyMeta>, -} - -impl NpmPackageVersionInfo { - pub fn dependencies_as_entries( - &self, - ) -> Result<Vec<NpmDependencyEntry>, AnyError> { - fn parse_dep_entry( - (key, value): (&String, &String), - kind: NpmDependencyEntryKind, - ) -> Result<NpmDependencyEntry, AnyError> { - let (name, version_req) = - parse_dep_entry_name_and_raw_version(key, value)?; - let version_req = - VersionReq::parse_from_npm(version_req).with_context(|| { - format!("error parsing version requirement for dependency: {key}@{version_req}") - })?; - Ok(NpmDependencyEntry { - kind, - bare_specifier: key.to_string(), - name: name.to_string(), - version_req, - peer_dep_version_req: None, - }) - } - - let mut result = HashMap::with_capacity( - self.dependencies.len() + self.peer_dependencies.len(), - ); - for entry in &self.peer_dependencies { - let is_optional = self - .peer_dependencies_meta - .get(entry.0) - .map(|d| d.optional) - .unwrap_or(false); - let kind = match is_optional { - true => NpmDependencyEntryKind::OptionalPeer, - false => NpmDependencyEntryKind::Peer, - }; - let entry = parse_dep_entry(entry, kind)?; - result.insert(entry.bare_specifier.clone(), entry); - } - for entry in &self.dependencies { - let entry = parse_dep_entry(entry, NpmDependencyEntryKind::Dep)?; - // people may define a dependency as a peer dependency as well, - // so in those cases, attempt to resolve as a peer dependency, - // but then use this dependency version requirement otherwise - if let Some(peer_dep_entry) = result.get_mut(&entry.bare_specifier) { - peer_dep_entry.peer_dep_version_req = Some(entry.version_req); - } else { - result.insert(entry.bare_specifier.clone(), entry); - } - } - Ok(result.into_values().collect()) - } -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize, PartialEq, Eq)] -pub struct NpmPackageVersionDistInfo { - /// URL to the tarball. - pub tarball: String, - shasum: String, - integrity: Option<String>, -} - -impl NpmPackageVersionDistInfo { - pub fn integrity(&self) -> Cow<String> { - self - .integrity - .as_ref() - .map(Cow::Borrowed) - .unwrap_or_else(|| Cow::Owned(format!("sha1-{}", self.shasum))) - } -} - static NPM_REGISTRY_DEFAULT_URL: Lazy<Url> = Lazy::new(|| { let env_var_name = "NPM_CONFIG_REGISTRY"; if let Ok(registry_url) = std::env::var(env_var_name) { @@ -209,9 +51,9 @@ static NPM_REGISTRY_DEFAULT_URL: Lazy<Url> = Lazy::new(|| { }); #[derive(Clone, Debug)] -pub struct NpmRegistryApi(Arc<dyn NpmRegistryApiInner>); +pub struct NpmRegistry(Option<Arc<NpmRegistryApiInner>>); -impl NpmRegistryApi { +impl NpmRegistry { pub fn default_url() -> &'static Url { &NPM_REGISTRY_DEFAULT_URL } @@ -222,188 +64,142 @@ impl NpmRegistryApi { http_client: HttpClient, progress_bar: ProgressBar, ) -> Self { - Self(Arc::new(RealNpmRegistryApiInner { + Self(Some(Arc::new(NpmRegistryApiInner { base_url, cache, mem_cache: Default::default(), previously_reloaded_packages: Default::default(), http_client, progress_bar, - })) + }))) } - /// Creates an npm registry API that will be uninitialized - /// and error for every request. This is useful for tests - /// or for initializing the LSP. + /// Creates an npm registry API that will be uninitialized. This is + /// useful for tests or for initializing the LSP. pub fn new_uninitialized() -> Self { - Self(Arc::new(NullNpmRegistryApiInner)) - } - - #[cfg(test)] - pub fn new_for_test(api: TestNpmRegistryApiInner) -> NpmRegistryApi { - Self(Arc::new(api)) - } - - pub async fn package_info( - &self, - name: &str, - ) -> Result<Arc<NpmPackageInfo>, AnyError> { - let maybe_package_info = self.0.maybe_package_info(name).await?; - match maybe_package_info { - Some(package_info) => Ok(package_info), - None => bail!("npm package '{}' does not exist", name), - } - } - - pub async fn package_version_info( - &self, - nv: &NpmPackageNv, - ) -> Result<Option<NpmPackageVersionInfo>, AnyError> { - let package_info = self.package_info(&nv.name).await?; - Ok(package_info.versions.get(&nv.version.to_string()).cloned()) - } - - /// Caches all the package information in memory in parallel. - pub async fn cache_in_parallel( - &self, - package_names: Vec<String>, - ) -> Result<(), AnyError> { - let mut unresolved_tasks = Vec::with_capacity(package_names.len()); - - // cache the package info up front in parallel - if should_sync_download() { - // for deterministic test output - let mut ordered_names = package_names; - ordered_names.sort(); - for name in ordered_names { - self.package_info(&name).await?; - } - } else { - for name in package_names { - let api = self.clone(); - unresolved_tasks.push(tokio::task::spawn(async move { - // This is ok to call because api will internally cache - // the package information in memory. - api.package_info(&name).await - })); - } - }; - - for result in futures::future::join_all(unresolved_tasks).await { - result??; // surface the first error - } - - Ok(()) + Self(None) } /// Clears the internal memory cache. pub fn clear_memory_cache(&self) { - self.0.clear_memory_cache(); + self.inner().clear_memory_cache(); } pub fn get_cached_package_info( &self, name: &str, ) -> Option<Arc<NpmPackageInfo>> { - self.0.get_cached_package_info(name) + self.inner().get_cached_package_info(name) } pub fn base_url(&self) -> &Url { - self.0.base_url() + &self.inner().base_url } -} - -#[async_trait] -trait NpmRegistryApiInner: std::fmt::Debug + Sync + Send + 'static { - async fn maybe_package_info( - &self, - name: &str, - ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError>; - fn clear_memory_cache(&self); - - fn get_cached_package_info(&self, name: &str) -> Option<Arc<NpmPackageInfo>>; - - fn base_url(&self) -> &Url; + fn inner(&self) -> &Arc<NpmRegistryApiInner> { + // this panicking indicates a bug in the code where this + // wasn't initialized + self.0.as_ref().unwrap() + } } -#[async_trait] -impl NpmRegistryApiInner for RealNpmRegistryApiInner { - fn base_url(&self) -> &Url { - &self.base_url - } +static SYNC_DOWNLOAD_TASK_QUEUE: Lazy<TaskQueue> = + Lazy::new(TaskQueue::default); +#[async_trait] +impl NpmRegistryApi for NpmRegistry { async fn maybe_package_info( &self, name: &str, ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> { - self.maybe_package_info(name).await - } - - fn clear_memory_cache(&self) { - self.mem_cache.lock().clear(); + if should_sync_download() { + let inner = self.inner().clone(); + SYNC_DOWNLOAD_TASK_QUEUE + .queue(async move { inner.maybe_package_info(name).await }) + .await + } else { + self.inner().maybe_package_info(name).await + } } +} - fn get_cached_package_info(&self, name: &str) -> Option<Arc<NpmPackageInfo>> { - self.mem_cache.lock().get(name).cloned().flatten() - } +#[derive(Debug)] +enum CacheItem { + Pending( + Shared<BoxFuture<'static, Result<Option<Arc<NpmPackageInfo>>, String>>>, + ), + Resolved(Option<Arc<NpmPackageInfo>>), } #[derive(Debug)] -struct RealNpmRegistryApiInner { +struct NpmRegistryApiInner { base_url: Url, cache: NpmCache, - mem_cache: Mutex<HashMap<String, Option<Arc<NpmPackageInfo>>>>, + mem_cache: Mutex<HashMap<String, CacheItem>>, previously_reloaded_packages: Mutex<HashSet<String>>, http_client: HttpClient, progress_bar: ProgressBar, } -impl RealNpmRegistryApiInner { +impl NpmRegistryApiInner { pub async fn maybe_package_info( - &self, + self: &Arc<Self>, name: &str, ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> { - let maybe_maybe_info = self.mem_cache.lock().get(name).cloned(); - if let Some(maybe_info) = maybe_maybe_info { - Ok(maybe_info) - } else { - let mut maybe_package_info = None; - if self.cache.cache_setting().should_use_for_npm_package(name) + let (created, future) = { + let mut mem_cache = self.mem_cache.lock(); + match mem_cache.get(name) { + Some(CacheItem::Resolved(maybe_info)) => { + return Ok(maybe_info.clone()); + } + Some(CacheItem::Pending(future)) => (false, future.clone()), + None => { + if self.cache.cache_setting().should_use_for_npm_package(name) // if this has been previously reloaded, then try loading from the // file system cache || !self.previously_reloaded_packages.lock().insert(name.to_string()) - { - // attempt to load from the file cache - maybe_package_info = self.load_file_cached_package_info(name); - } - - if maybe_package_info.is_none() { - maybe_package_info = self - .load_package_info_from_registry(name) - .await - .with_context(|| { - format!( - "Error getting response at {} for package \"{}\"", - self.get_package_url(name), - name - ) - })?; + { + // attempt to load from the file cache + if let Some(info) = self.load_file_cached_package_info(name) { + let result = Some(Arc::new(info)); + mem_cache + .insert(name.to_string(), CacheItem::Resolved(result.clone())); + return Ok(result); + } + } + + let future = { + let api = self.clone(); + let name = name.to_string(); + async move { api.load_package_info_from_registry(&name).await } + .boxed() + .shared() + }; + mem_cache + .insert(name.to_string(), CacheItem::Pending(future.clone())); + (true, future) + } } - let maybe_package_info = maybe_package_info.map(Arc::new); + }; - // Not worth the complexity to ensure multiple in-flight requests - // for the same package only request once because with how this is - // used that should never happen. - let mut mem_cache = self.mem_cache.lock(); - Ok(match mem_cache.get(name) { - // another thread raced here, so use its result instead - Some(info) => info.clone(), - None => { - mem_cache.insert(name.to_string(), maybe_package_info.clone()); - maybe_package_info + if created { + match future.await { + Ok(maybe_info) => { + // replace the cache item to say it's resolved now + self + .mem_cache + .lock() + .insert(name.to_string(), CacheItem::Resolved(maybe_info.clone())); + Ok(maybe_info) } - }) + Err(err) => { + // purge the item from the cache so it loads next time + self.mem_cache.lock().remove(name); + Err(anyhow!("{}", err)) + } + } + } else { + Ok(future.await.map_err(|err| anyhow!("{}", err))?) } } @@ -478,6 +274,25 @@ impl RealNpmRegistryApiInner { async fn load_package_info_from_registry( &self, name: &str, + ) -> Result<Option<Arc<NpmPackageInfo>>, String> { + self + .load_package_info_from_registry_inner(name) + .await + .with_context(|| { + format!( + "Error getting response at {} for package \"{}\"", + self.get_package_url(name), + name + ) + }) + .map(|info| info.map(Arc::new)) + // make cloneable + .map_err(|err| format!("{err:#}")) + } + + async fn load_package_info_from_registry_inner( + &self, + name: &str, ) -> Result<Option<NpmPackageInfo>, AnyError> { if *self.cache.cache_setting() == CacheSetting::Only { return Err(custom_error( @@ -513,206 +328,20 @@ impl RealNpmRegistryApiInner { let name_folder_path = self.cache.package_name_folder(name, &self.base_url); name_folder_path.join("registry.json") } -} - -#[derive(Debug)] -struct NullNpmRegistryApiInner; - -#[async_trait] -impl NpmRegistryApiInner for NullNpmRegistryApiInner { - async fn maybe_package_info( - &self, - _name: &str, - ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> { - Err(deno_core::anyhow::anyhow!( - "Deno bug. Please report. Registry API was not initialized." - )) - } - - fn clear_memory_cache(&self) {} - - fn get_cached_package_info( - &self, - _name: &str, - ) -> Option<Arc<NpmPackageInfo>> { - None - } - - fn base_url(&self) -> &Url { - NpmRegistryApi::default_url() - } -} - -/// Note: This test struct is not thread safe for setup -/// purposes. Construct everything on the same thread. -#[cfg(test)] -#[derive(Clone, Default, Debug)] -pub struct TestNpmRegistryApiInner { - package_infos: Arc<Mutex<HashMap<String, NpmPackageInfo>>>, -} - -#[cfg(test)] -impl TestNpmRegistryApiInner { - pub fn add_package_info(&self, name: &str, info: NpmPackageInfo) { - let previous = self.package_infos.lock().insert(name.to_string(), info); - assert!(previous.is_none()); - } - - pub fn ensure_package(&self, name: &str) { - if !self.package_infos.lock().contains_key(name) { - self.add_package_info( - name, - NpmPackageInfo { - name: name.to_string(), - ..Default::default() - }, - ); - } - } - pub fn ensure_package_version(&self, name: &str, version: &str) { - self.ensure_package(name); - let mut infos = self.package_infos.lock(); - let info = infos.get_mut(name).unwrap(); - if !info.versions.contains_key(version) { - info.versions.insert( - version.to_string(), - NpmPackageVersionInfo { - version: version.to_string(), - ..Default::default() - }, - ); - } - } - - pub fn add_dependency( - &self, - package_from: (&str, &str), - package_to: (&str, &str), - ) { - let mut infos = self.package_infos.lock(); - let info = infos.get_mut(package_from.0).unwrap(); - let version = info.versions.get_mut(package_from.1).unwrap(); - version - .dependencies - .insert(package_to.0.to_string(), package_to.1.to_string()); - } - - pub fn add_dist_tag(&self, package_name: &str, tag: &str, version: &str) { - let mut infos = self.package_infos.lock(); - let info = infos.get_mut(package_name).unwrap(); - info.dist_tags.insert(tag.to_string(), version.to_string()); - } - - pub fn add_peer_dependency( - &self, - package_from: (&str, &str), - package_to: (&str, &str), - ) { - let mut infos = self.package_infos.lock(); - let info = infos.get_mut(package_from.0).unwrap(); - let version = info.versions.get_mut(package_from.1).unwrap(); - version - .peer_dependencies - .insert(package_to.0.to_string(), package_to.1.to_string()); - } - - pub fn add_optional_peer_dependency( - &self, - package_from: (&str, &str), - package_to: (&str, &str), - ) { - let mut infos = self.package_infos.lock(); - let info = infos.get_mut(package_from.0).unwrap(); - let version = info.versions.get_mut(package_from.1).unwrap(); - version - .peer_dependencies - .insert(package_to.0.to_string(), package_to.1.to_string()); - version.peer_dependencies_meta.insert( - package_to.0.to_string(), - NpmPeerDependencyMeta { optional: true }, - ); + pub fn clear_memory_cache(&self) { + self.mem_cache.lock().clear(); } -} -#[cfg(test)] -#[async_trait] -impl NpmRegistryApiInner for TestNpmRegistryApiInner { - async fn maybe_package_info( + pub fn get_cached_package_info( &self, name: &str, - ) -> Result<Option<Arc<NpmPackageInfo>>, AnyError> { - let result = self.package_infos.lock().get(name).cloned(); - Ok(result.map(Arc::new)) - } - - fn clear_memory_cache(&self) { - // do nothing for the test api - } - - fn get_cached_package_info( - &self, - _name: &str, ) -> Option<Arc<NpmPackageInfo>> { - None - } - - fn base_url(&self) -> &Url { - NpmRegistryApi::default_url() - } -} - -#[cfg(test)] -mod test { - use std::collections::HashMap; - - use deno_core::serde_json; - - use crate::npm::registry::NpmPackageVersionBinEntry; - use crate::npm::NpmPackageVersionDistInfo; - - use super::NpmPackageVersionInfo; - - #[test] - fn deserializes_minimal_pkg_info() { - let text = r#"{ "version": "1.0.0", "dist": { "tarball": "value", "shasum": "test" } }"#; - let info: NpmPackageVersionInfo = serde_json::from_str(text).unwrap(); - assert_eq!( - info, - NpmPackageVersionInfo { - version: "1.0.0".to_string(), - dist: NpmPackageVersionDistInfo { - tarball: "value".to_string(), - shasum: "test".to_string(), - integrity: None, - }, - bin: None, - dependencies: Default::default(), - peer_dependencies: Default::default(), - peer_dependencies_meta: Default::default() - } - ); - } - - #[test] - fn deserializes_bin_entry() { - // string - let text = r#"{ "version": "1.0.0", "bin": "bin-value", "dist": { "tarball": "value", "shasum": "test" } }"#; - let info: NpmPackageVersionInfo = serde_json::from_str(text).unwrap(); - assert_eq!( - info.bin, - Some(NpmPackageVersionBinEntry::String("bin-value".to_string())) - ); - - // map - let text = r#"{ "version": "1.0.0", "bin": { "a": "a-value", "b": "b-value" }, "dist": { "tarball": "value", "shasum": "test" } }"#; - let info: NpmPackageVersionInfo = serde_json::from_str(text).unwrap(); - assert_eq!( - info.bin, - Some(NpmPackageVersionBinEntry::Map(HashMap::from([ - ("a".to_string(), "a-value".to_string()), - ("b".to_string(), "b-value".to_string()), - ]))) - ); + let mem_cache = self.mem_cache.lock(); + if let Some(CacheItem::Resolved(maybe_info)) = mem_cache.get(name) { + maybe_info.clone() + } else { + None + } } } |