summaryrefslogtreecommitdiff
path: root/cli/npm/managed
diff options
context:
space:
mode:
Diffstat (limited to 'cli/npm/managed')
-rw-r--r--cli/npm/managed/cache.rs51
-rw-r--r--cli/npm/managed/tarball.rs93
2 files changed, 125 insertions, 19 deletions
diff --git a/cli/npm/managed/cache.rs b/cli/npm/managed/cache.rs
index 9ba5c1c99..44b98fcee 100644
--- a/cli/npm/managed/cache.rs
+++ b/cli/npm/managed/cache.rs
@@ -25,6 +25,7 @@ use crate::util::fs::hard_link_dir_recursive;
use crate::util::progress_bar::ProgressBar;
use super::tarball::verify_and_extract_tarball;
+use super::tarball::TarballExtractionMode;
/// Stores a single copy of npm packages in a cache.
#[derive(Debug)]
@@ -69,7 +70,7 @@ impl NpmCache {
/// to ensure a package is only downloaded once per run of the CLI. This
/// prevents downloads from re-occurring when someone has `--reload` and
/// and imports a dynamic import that imports the same package again for example.
- fn should_use_global_cache_for_package(&self, package: &PackageNv) -> bool {
+ fn should_use_cache_for_package(&self, package: &PackageNv) -> bool {
self.cache_setting.should_use_for_npm_package(&package.name)
|| !self
.previously_reloaded_packages
@@ -91,26 +92,23 @@ impl NpmCache {
async fn ensure_package_inner(
&self,
- package: &PackageNv,
+ package_nv: &PackageNv,
dist: &NpmPackageVersionDistInfo,
registry_url: &Url,
) -> Result<(), AnyError> {
let package_folder = self
.cache_dir
- .package_folder_for_name_and_version(package, registry_url);
- if self.should_use_global_cache_for_package(package)
- && self.fs.exists_sync(&package_folder)
- // if this file exists, then the package didn't successfully extract
- // the first time, or another process is currently extracting the zip file
- && !self.fs.exists_sync(&package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME))
- {
+ .package_folder_for_name_and_version(package_nv, registry_url);
+ let should_use_cache = self.should_use_cache_for_package(package_nv);
+ let package_folder_exists = self.fs.exists_sync(&package_folder);
+ if should_use_cache && package_folder_exists {
return Ok(());
} else if self.cache_setting == CacheSetting::Only {
return Err(custom_error(
"NotCached",
format!(
"An npm specifier not found in cache: \"{}\", --cached-only is specified.",
- &package.name
+ &package_nv.name
)
)
);
@@ -127,7 +125,31 @@ impl NpmCache {
.await?;
match maybe_bytes {
Some(bytes) => {
- verify_and_extract_tarball(package, &bytes, dist, &package_folder)
+ let extraction_mode = if should_use_cache || !package_folder_exists {
+ TarballExtractionMode::SiblingTempDir
+ } else {
+ // The user ran with `--reload`, so overwrite the package instead of
+ // deleting it since the package might get corrupted if a user kills
+ // their deno process while it's deleting a package directory
+ //
+ // We can't rename this folder and delete it because the folder
+ // may be in use by another process or may now contain hardlinks,
+ // which will cause windows to throw an "AccessDenied" error when
+ // renaming. So we settle for overwriting.
+ TarballExtractionMode::Overwrite
+ };
+ let dist = dist.clone();
+ let package_nv = package_nv.clone();
+ deno_core::unsync::spawn_blocking(move || {
+ verify_and_extract_tarball(
+ &package_nv,
+ &bytes,
+ &dist,
+ &package_folder,
+ extraction_mode,
+ )
+ })
+ .await?
}
None => {
bail!("Could not find npm package tarball at: {}", dist.tarball);
@@ -150,7 +172,7 @@ impl NpmCache {
.package_folder_for_id(folder_id, registry_url);
if package_folder.exists()
- // if this file exists, then the package didn't successfully extract
+ // if this file exists, then the package didn't successfully initialize
// the first time, or another process is currently extracting the zip file
&& !package_folder.join(NPM_PACKAGE_SYNC_LOCK_FILENAME).exists()
&& self.cache_setting.should_use_for_npm_package(&folder_id.nv.name)
@@ -161,6 +183,9 @@ impl NpmCache {
let original_package_folder = self
.cache_dir
.package_folder_for_name_and_version(&folder_id.nv, registry_url);
+
+ // it seems Windows does an "AccessDenied" error when moving a
+ // directory with hard links, so that's why this solution is done
with_folder_sync_lock(&folder_id.nv, &package_folder, || {
hard_link_dir_recursive(&original_package_folder, &package_folder)
})?;
@@ -206,7 +231,7 @@ impl NpmCache {
const NPM_PACKAGE_SYNC_LOCK_FILENAME: &str = ".deno_sync_lock";
-pub fn with_folder_sync_lock(
+fn with_folder_sync_lock(
package: &PackageNv,
output_folder: &Path,
action: impl FnOnce() -> Result<(), AnyError>,
diff --git a/cli/npm/managed/tarball.rs b/cli/npm/managed/tarball.rs
index 1267b13d8..e2d242e66 100644
--- a/cli/npm/managed/tarball.rs
+++ b/cli/npm/managed/tarball.rs
@@ -2,12 +2,14 @@
use std::collections::HashSet;
use std::fs;
+use std::io::ErrorKind;
use std::path::Path;
use std::path::PathBuf;
use base64::prelude::BASE64_STANDARD;
use base64::Engine;
use deno_core::anyhow::bail;
+use deno_core::anyhow::Context;
use deno_core::error::AnyError;
use deno_npm::registry::NpmPackageVersionDistInfo;
use deno_npm::registry::NpmPackageVersionDistInfoIntegrity;
@@ -16,19 +18,76 @@ use flate2::read::GzDecoder;
use tar::Archive;
use tar::EntryType;
-use super::cache::with_folder_sync_lock;
+use crate::util::path::get_atomic_dir_path;
+
+#[derive(Debug, Copy, Clone)]
+pub enum TarballExtractionMode {
+ /// Overwrites the destination directory without deleting any files.
+ Overwrite,
+ /// Creates and writes to a sibling temporary directory. When done, moves
+ /// it to the final destination.
+ ///
+ /// This is more robust than `Overwrite` as it better handles multiple
+ /// processes writing to the directory at the same time.
+ SiblingTempDir,
+}
pub fn verify_and_extract_tarball(
- package: &PackageNv,
+ package_nv: &PackageNv,
data: &[u8],
dist_info: &NpmPackageVersionDistInfo,
output_folder: &Path,
+ extraction_mode: TarballExtractionMode,
) -> Result<(), AnyError> {
- verify_tarball_integrity(package, data, &dist_info.integrity())?;
+ verify_tarball_integrity(package_nv, data, &dist_info.integrity())?;
+
+ match extraction_mode {
+ TarballExtractionMode::Overwrite => extract_tarball(data, output_folder),
+ TarballExtractionMode::SiblingTempDir => {
+ let temp_dir = get_atomic_dir_path(output_folder);
+ extract_tarball(data, &temp_dir)?;
+ rename_with_retries(&temp_dir, output_folder)
+ .map_err(AnyError::from)
+ .context("Failed moving extracted tarball to final destination.")
+ }
+ }
+}
+
+fn rename_with_retries(
+ temp_dir: &Path,
+ output_folder: &Path,
+) -> Result<(), std::io::Error> {
+ fn already_exists(err: &std::io::Error, output_folder: &Path) -> bool {
+ // Windows will do an "Access is denied" error
+ err.kind() == ErrorKind::AlreadyExists || output_folder.exists()
+ }
+
+ let mut count = 0;
+ // renaming might be flaky if a lot of processes are trying
+ // to do this, so retry a few times
+ loop {
+ match fs::rename(temp_dir, output_folder) {
+ Ok(_) => return Ok(()),
+ Err(err) if already_exists(&err, output_folder) => {
+ // another process copied here, just cleanup
+ let _ = fs::remove_dir_all(temp_dir);
+ return Ok(());
+ }
+ Err(err) => {
+ count += 1;
+ if count > 5 {
+ // too many retries, cleanup and return the error
+ let _ = fs::remove_dir_all(temp_dir);
+ return Err(err);
+ }
- with_folder_sync_lock(package, output_folder, || {
- extract_tarball(data, output_folder)
- })
+ // wait a bit before retrying... this should be very rare or only
+ // in error cases, so ok to sleep a bit
+ let sleep_ms = std::cmp::min(100, 20 * count);
+ std::thread::sleep(std::time::Duration::from_millis(sleep_ms));
+ }
+ }
+ }
}
fn verify_tarball_integrity(
@@ -150,6 +209,7 @@ fn extract_tarball(data: &[u8], output_folder: &Path) -> Result<(), AnyError> {
#[cfg(test)]
mod test {
use deno_semver::Version;
+ use test_util::TempDir;
use super::*;
@@ -240,4 +300,25 @@ mod test {
)
.is_ok());
}
+
+ #[test]
+ fn rename_with_retries_succeeds_exists() {
+ let temp_dir = TempDir::new();
+ let folder_1 = temp_dir.path().join("folder_1");
+ let folder_2 = temp_dir.path().join("folder_2");
+
+ folder_1.create_dir_all();
+ folder_1.join("a.txt").write("test");
+ folder_2.create_dir_all();
+ // this will not end up in the output as rename_with_retries assumes
+ // the folders ending up at the destination are the same
+ folder_2.join("b.txt").write("test2");
+
+ let dest_folder = temp_dir.path().join("dest_folder");
+
+ rename_with_retries(folder_1.as_path(), dest_folder.as_path()).unwrap();
+ rename_with_retries(folder_2.as_path(), dest_folder.as_path()).unwrap();
+ assert!(dest_folder.join("a.txt").exists());
+ assert!(!dest_folder.join("b.txt").exists());
+ }
}