summaryrefslogtreecommitdiff
path: root/cli/util/fs.rs
diff options
context:
space:
mode:
Diffstat (limited to 'cli/util/fs.rs')
-rw-r--r--cli/util/fs.rs358
1 files changed, 201 insertions, 157 deletions
diff --git a/cli/util/fs.rs b/cli/util/fs.rs
index f9fe9424f..86b17754b 100644
--- a/cli/util/fs.rs
+++ b/cli/util/fs.rs
@@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
+use deno_core::anyhow::anyhow;
use deno_core::anyhow::Context;
use deno_core::error::AnyError;
pub use deno_core::normalize_path;
@@ -8,7 +9,7 @@ use deno_core::ModuleSpecifier;
use deno_runtime::deno_crypto::rand;
use deno_runtime::deno_fs::FileSystem;
use deno_runtime::deno_node::PathClean;
-use std::borrow::Cow;
+use std::collections::HashSet;
use std::env::current_dir;
use std::fmt::Write as FmtWrite;
use std::fs::OpenOptions;
@@ -21,11 +22,13 @@ use std::sync::Arc;
use std::time::Duration;
use walkdir::WalkDir;
-use crate::args::FilesConfig;
use crate::util::progress_bar::ProgressBar;
use crate::util::progress_bar::ProgressBarStyle;
use crate::util::progress_bar::ProgressMessagePrompt;
+use super::glob::FilePatterns;
+use super::glob::PathOrPattern;
+use super::glob::PathOrPatternSet;
use super::path::specifier_to_file_path;
/// Writes the file to the file system at a temporary path, then
@@ -244,18 +247,16 @@ pub fn resolve_from_cwd(path: &Path) -> Result<PathBuf, AnyError> {
/// Collects file paths that satisfy the given predicate, by recursively walking `files`.
/// If the walker visits a path that is listed in `ignore`, it skips descending into the directory.
-pub struct FileCollector<TFilter: Fn(&Path) -> bool> {
- canonicalized_ignore: Vec<PathBuf>,
+pub struct FileCollector<TFilter: Fn(&Path, &FilePatterns) -> bool> {
file_filter: TFilter,
ignore_git_folder: bool,
ignore_node_modules: bool,
ignore_vendor_folder: bool,
}
-impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> {
+impl<TFilter: Fn(&Path, &FilePatterns) -> bool> FileCollector<TFilter> {
pub fn new(file_filter: TFilter) -> Self {
Self {
- canonicalized_ignore: Default::default(),
file_filter,
ignore_git_folder: false,
ignore_node_modules: false,
@@ -263,14 +264,6 @@ impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> {
}
}
- pub fn add_ignore_paths(mut self, paths: &[PathBuf]) -> Self {
- // retain only the paths which exist and ignore the rest
- self
- .canonicalized_ignore
- .extend(paths.iter().filter_map(|i| canonicalize_path(i).ok()));
- self
- }
-
pub fn ignore_node_modules(mut self) -> Self {
self.ignore_node_modules = true;
self
@@ -286,58 +279,62 @@ impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> {
self
}
- pub fn collect_files(
+ pub fn collect_file_patterns(
&self,
- files: Option<&[PathBuf]>,
+ file_patterns: FilePatterns,
) -> Result<Vec<PathBuf>, AnyError> {
let mut target_files = Vec::new();
- let files = if let Some(files) = files {
- Cow::Borrowed(files)
- } else {
- Cow::Owned(vec![PathBuf::from(".")])
- };
- for file in files.iter() {
- if let Ok(file) = canonicalize_path(file) {
- // use an iterator like this in order to minimize the number of file system operations
- let mut iterator = WalkDir::new(&file).into_iter();
- loop {
- let e = match iterator.next() {
- None => break,
- Some(Err(_)) => continue,
- Some(Ok(entry)) => entry,
- };
- let file_type = e.file_type();
- let is_dir = file_type.is_dir();
- if let Ok(c) = canonicalize_path(e.path()) {
- if self.canonicalized_ignore.iter().any(|i| c.starts_with(i)) {
- if is_dir {
- iterator.skip_current_dir();
- }
- } else if is_dir {
- let should_ignore_dir = c
- .file_name()
- .map(|dir_name| {
- let dir_name = dir_name.to_string_lossy().to_lowercase();
- let is_ignored_file = match dir_name.as_str() {
- "node_modules" => self.ignore_node_modules,
- "vendor" => self.ignore_vendor_folder,
- ".git" => self.ignore_git_folder,
- _ => false,
- };
- // allow the user to opt out of ignoring by explicitly specifying the dir
- file != c && is_ignored_file
- })
- .unwrap_or(false);
- if should_ignore_dir {
- iterator.skip_current_dir();
- }
- } else if (self.file_filter)(e.path()) {
- target_files.push(c);
- }
- } else if is_dir {
- // failed canonicalizing, so skip it
+ let mut visited_paths = HashSet::new();
+ let file_patterns_by_base = file_patterns.split_by_base();
+ for (base, file_patterns) in file_patterns_by_base {
+ let file = normalize_path(base);
+ // use an iterator in order to minimize the number of file system operations
+ let mut iterator = WalkDir::new(&file)
+ .follow_links(false) // the default, but be explicit
+ .into_iter();
+ loop {
+ let e = match iterator.next() {
+ None => break,
+ Some(Err(_)) => continue,
+ Some(Ok(entry)) => entry,
+ };
+ let file_type = e.file_type();
+ let is_dir = file_type.is_dir();
+ let c = e.path().to_path_buf();
+ if file_patterns.exclude.matches_path(&c)
+ || !is_dir
+ && !file_patterns
+ .include
+ .as_ref()
+ .map(|i| i.matches_path(&c))
+ .unwrap_or(true)
+ {
+ if is_dir {
iterator.skip_current_dir();
}
+ } else if is_dir {
+ let should_ignore_dir = c
+ .file_name()
+ .map(|dir_name| {
+ let dir_name = dir_name.to_string_lossy().to_lowercase();
+ let is_ignored_file = match dir_name.as_str() {
+ "node_modules" => self.ignore_node_modules,
+ "vendor" => self.ignore_vendor_folder,
+ ".git" => self.ignore_git_folder,
+ _ => false,
+ };
+ // allow the user to opt out of ignoring by explicitly specifying the dir
+ file != c && is_ignored_file
+ })
+ .unwrap_or(false)
+ || !visited_paths.insert(c.clone());
+ if should_ignore_dir {
+ iterator.skip_current_dir();
+ }
+ } else if (self.file_filter)(&c, &file_patterns)
+ && visited_paths.insert(c.clone())
+ {
+ target_files.push(c);
}
}
}
@@ -349,54 +346,68 @@ impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> {
/// Specifiers that start with http and https are left intact.
/// Note: This ignores all .git and node_modules folders.
pub fn collect_specifiers(
- files: &FilesConfig,
- predicate: impl Fn(&Path) -> bool,
+ mut files: FilePatterns,
+ predicate: impl Fn(&Path, &FilePatterns) -> bool,
) -> Result<Vec<ModuleSpecifier>, AnyError> {
let mut prepared = vec![];
- let file_collector = FileCollector::new(predicate)
- .add_ignore_paths(&files.exclude)
- .ignore_git_folder()
- .ignore_node_modules()
- .ignore_vendor_folder();
-
- let root_path = current_dir()?;
- let include_files = if let Some(include) = &files.include {
- Cow::Borrowed(include)
- } else {
- Cow::Owned(vec![root_path.clone()])
- };
- for path in include_files.iter() {
- let path = path.to_string_lossy();
- let lowercase_path = path.to_lowercase();
- if lowercase_path.starts_with("http://")
- || lowercase_path.starts_with("https://")
- {
- let url = ModuleSpecifier::parse(&path)?;
- prepared.push(url);
- continue;
- }
- let p = if lowercase_path.starts_with("file://") {
- specifier_to_file_path(&ModuleSpecifier::parse(&path)?)?
- } else {
- root_path.join(path.as_ref())
- };
- let p = normalize_path(p);
- if p.is_dir() {
- let test_files = file_collector.collect_files(Some(&[p]))?;
- let mut test_files_as_urls = test_files
- .iter()
- .map(|f| ModuleSpecifier::from_file_path(f).unwrap())
- .collect::<Vec<ModuleSpecifier>>();
-
- test_files_as_urls.sort();
- prepared.extend(test_files_as_urls);
- } else {
- let url = ModuleSpecifier::from_file_path(p).unwrap();
- prepared.push(url);
+ // break out the remote specifiers
+ if let Some(include_mut) = &mut files.include {
+ let includes = std::mem::take(include_mut);
+ let path_or_patterns = includes.into_path_or_patterns();
+ let mut result = Vec::with_capacity(path_or_patterns.len());
+ for path_or_pattern in path_or_patterns {
+ match path_or_pattern {
+ PathOrPattern::Path(path) => {
+ // todo(dsherret): we should improve this to not store URLs in a PathBuf
+ let path_str = path.to_string_lossy();
+ let lowercase_path = path_str.to_lowercase();
+ if lowercase_path.starts_with("http://")
+ || lowercase_path.starts_with("https://")
+ {
+ // take out the url
+ let url = ModuleSpecifier::parse(&path_str)
+ .with_context(|| format!("Invalid URL '{}'", path_str))?;
+ prepared.push(url);
+ } else if lowercase_path.starts_with("file://") {
+ let url = ModuleSpecifier::parse(&path_str)
+ .with_context(|| format!("Invalid URL '{}'", path_str))?;
+ let p = specifier_to_file_path(&url)?;
+ if p.is_dir() {
+ result.push(PathOrPattern::Path(p));
+ } else {
+ prepared.push(url)
+ }
+ } else if path.is_dir() {
+ result.push(PathOrPattern::Path(path));
+ } else if !files.exclude.matches_path(&path) {
+ let url = ModuleSpecifier::from_file_path(&path)
+ .map_err(|_| anyhow!("Invalid file path '{}'", path.display()))?;
+ prepared.push(url);
+ }
+ }
+ PathOrPattern::Pattern(pattern) => {
+ // add it back
+ result.push(PathOrPattern::Pattern(pattern));
+ }
+ }
}
+ *include_mut = PathOrPatternSet::new(result);
}
+ let collected_files = FileCollector::new(predicate)
+ .ignore_git_folder()
+ .ignore_node_modules()
+ .ignore_vendor_folder()
+ .collect_file_patterns(files)?;
+ let mut collected_files_as_urls = collected_files
+ .iter()
+ .map(|f| ModuleSpecifier::from_file_path(f).unwrap())
+ .collect::<Vec<ModuleSpecifier>>();
+
+ collected_files_as_urls.sort();
+ prepared.extend(collected_files_as_urls);
+
Ok(prepared)
}
@@ -812,18 +823,29 @@ mod tests {
let ignore_dir_files = ["g.d.ts", ".gitignore"];
create_files(&ignore_dir_path, &ignore_dir_files);
- let file_collector = FileCollector::new(|path| {
+ let file_patterns = FilePatterns {
+ include: Some(
+ PathOrPatternSet::from_absolute_paths(
+ vec![root_dir_path.to_path_buf()],
+ )
+ .unwrap(),
+ ),
+ exclude: PathOrPatternSet::from_absolute_paths(vec![
+ ignore_dir_path.to_path_buf()
+ ])
+ .unwrap(),
+ };
+ let file_collector = FileCollector::new(|path, _| {
// exclude dotfiles
path
.file_name()
.and_then(|f| f.to_str())
.map(|f| !f.starts_with('.'))
.unwrap_or(false)
- })
- .add_ignore_paths(&[ignore_dir_path.to_path_buf()]);
+ });
let result = file_collector
- .collect_files(Some(&[root_dir_path.to_path_buf()]))
+ .collect_file_patterns(file_patterns.clone())
.unwrap();
let expected = [
"README.md",
@@ -850,7 +872,7 @@ mod tests {
.ignore_node_modules()
.ignore_vendor_folder();
let result = file_collector
- .collect_files(Some(&[root_dir_path.to_path_buf()]))
+ .collect_file_patterns(file_patterns.clone())
.unwrap();
let expected = [
"README.md",
@@ -869,12 +891,20 @@ mod tests {
assert_eq!(file_names, expected);
// test opting out of ignoring by specifying the dir
- let result = file_collector
- .collect_files(Some(&[
- root_dir_path.to_path_buf(),
- root_dir_path.to_path_buf().join("child/node_modules/"),
- ]))
- .unwrap();
+ let file_patterns = FilePatterns {
+ include: Some(
+ PathOrPatternSet::from_absolute_paths(vec![
+ root_dir_path.to_path_buf(),
+ root_dir_path.to_path_buf().join("child/node_modules/"),
+ ])
+ .unwrap(),
+ ),
+ exclude: PathOrPatternSet::from_absolute_paths(vec![
+ ignore_dir_path.to_path_buf()
+ ])
+ .unwrap(),
+ };
+ let result = file_collector.collect_file_patterns(file_patterns).unwrap();
let expected = [
"README.md",
"a.ts",
@@ -930,7 +960,7 @@ mod tests {
let ignore_dir_files = ["g.d.ts", ".gitignore"];
create_files(&ignore_dir_path, &ignore_dir_files);
- let predicate = |path: &Path| {
+ let predicate = |path: &Path, _: &FilePatterns| {
// exclude dotfiles
path
.file_name()
@@ -940,38 +970,46 @@ mod tests {
};
let result = collect_specifiers(
- &FilesConfig {
- include: Some(vec![
- PathBuf::from("http://localhost:8080"),
- root_dir_path.to_path_buf(),
- PathBuf::from("https://localhost:8080".to_string()),
- ]),
- exclude: vec![ignore_dir_path.to_path_buf()],
+ FilePatterns {
+ include: Some(
+ PathOrPatternSet::from_absolute_paths(vec![
+ PathBuf::from("http://localhost:8080"),
+ root_dir_path.to_path_buf(),
+ PathBuf::from("https://localhost:8080".to_string()),
+ ])
+ .unwrap(),
+ ),
+ exclude: PathOrPatternSet::from_absolute_paths(vec![
+ ignore_dir_path.to_path_buf()
+ ])
+ .unwrap(),
},
predicate,
)
.unwrap();
- let root_dir_url =
- ModuleSpecifier::from_file_path(root_dir_path.canonicalize())
- .unwrap()
- .to_string();
- let expected: Vec<ModuleSpecifier> = [
- "http://localhost:8080",
- &format!("{root_dir_url}/a.ts"),
- &format!("{root_dir_url}/b.js"),
- &format!("{root_dir_url}/c.tsx"),
- &format!("{root_dir_url}/child/README.md"),
- &format!("{root_dir_url}/child/e.mjs"),
- &format!("{root_dir_url}/child/f.mjsx"),
- &format!("{root_dir_url}/d.jsx"),
- "https://localhost:8080",
- ]
- .iter()
- .map(|f| ModuleSpecifier::parse(f).unwrap())
- .collect::<Vec<_>>();
+ let root_dir_url = ModuleSpecifier::from_file_path(&root_dir_path)
+ .unwrap()
+ .to_string();
+ let expected = vec![
+ "http://localhost:8080/".to_string(),
+ "https://localhost:8080/".to_string(),
+ format!("{root_dir_url}/a.ts"),
+ format!("{root_dir_url}/b.js"),
+ format!("{root_dir_url}/c.tsx"),
+ format!("{root_dir_url}/child/README.md"),
+ format!("{root_dir_url}/child/e.mjs"),
+ format!("{root_dir_url}/child/f.mjsx"),
+ format!("{root_dir_url}/d.jsx"),
+ ];
- assert_eq!(result, expected);
+ assert_eq!(
+ result
+ .into_iter()
+ .map(|s| s.to_string())
+ .collect::<Vec<_>>(),
+ expected
+ );
let scheme = if cfg!(target_os = "windows") {
"file:///"
@@ -979,28 +1017,34 @@ mod tests {
"file://"
};
let result = collect_specifiers(
- &FilesConfig {
- include: Some(vec![PathBuf::from(format!(
- "{}{}",
- scheme,
- root_dir_path.join("child").to_string().replace('\\', "/")
- ))]),
- exclude: vec![],
+ FilePatterns {
+ include: Some(
+ PathOrPatternSet::from_absolute_paths(vec![PathBuf::from(format!(
+ "{}{}",
+ scheme,
+ root_dir_path.join("child").to_string().replace('\\', "/")
+ ))])
+ .unwrap(),
+ ),
+ exclude: Default::default(),
},
predicate,
)
.unwrap();
- let expected: Vec<ModuleSpecifier> = [
- &format!("{root_dir_url}/child/README.md"),
- &format!("{root_dir_url}/child/e.mjs"),
- &format!("{root_dir_url}/child/f.mjsx"),
- ]
- .iter()
- .map(|f| ModuleSpecifier::parse(f).unwrap())
- .collect::<Vec<_>>();
+ let expected = vec![
+ format!("{root_dir_url}/child/README.md"),
+ format!("{root_dir_url}/child/e.mjs"),
+ format!("{root_dir_url}/child/f.mjsx"),
+ ];
- assert_eq!(result, expected);
+ assert_eq!(
+ result
+ .into_iter()
+ .map(|s| s.to_string())
+ .collect::<Vec<_>>(),
+ expected
+ );
}
#[tokio::test]