diff options
Diffstat (limited to 'cli/util/fs.rs')
-rw-r--r-- | cli/util/fs.rs | 358 |
1 files changed, 201 insertions, 157 deletions
diff --git a/cli/util/fs.rs b/cli/util/fs.rs index f9fe9424f..86b17754b 100644 --- a/cli/util/fs.rs +++ b/cli/util/fs.rs @@ -1,5 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use deno_core::anyhow::anyhow; use deno_core::anyhow::Context; use deno_core::error::AnyError; pub use deno_core::normalize_path; @@ -8,7 +9,7 @@ use deno_core::ModuleSpecifier; use deno_runtime::deno_crypto::rand; use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::PathClean; -use std::borrow::Cow; +use std::collections::HashSet; use std::env::current_dir; use std::fmt::Write as FmtWrite; use std::fs::OpenOptions; @@ -21,11 +22,13 @@ use std::sync::Arc; use std::time::Duration; use walkdir::WalkDir; -use crate::args::FilesConfig; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; use crate::util::progress_bar::ProgressMessagePrompt; +use super::glob::FilePatterns; +use super::glob::PathOrPattern; +use super::glob::PathOrPatternSet; use super::path::specifier_to_file_path; /// Writes the file to the file system at a temporary path, then @@ -244,18 +247,16 @@ pub fn resolve_from_cwd(path: &Path) -> Result<PathBuf, AnyError> { /// Collects file paths that satisfy the given predicate, by recursively walking `files`. /// If the walker visits a path that is listed in `ignore`, it skips descending into the directory. -pub struct FileCollector<TFilter: Fn(&Path) -> bool> { - canonicalized_ignore: Vec<PathBuf>, +pub struct FileCollector<TFilter: Fn(&Path, &FilePatterns) -> bool> { file_filter: TFilter, ignore_git_folder: bool, ignore_node_modules: bool, ignore_vendor_folder: bool, } -impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> { +impl<TFilter: Fn(&Path, &FilePatterns) -> bool> FileCollector<TFilter> { pub fn new(file_filter: TFilter) -> Self { Self { - canonicalized_ignore: Default::default(), file_filter, ignore_git_folder: false, ignore_node_modules: false, @@ -263,14 +264,6 @@ impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> { } } - pub fn add_ignore_paths(mut self, paths: &[PathBuf]) -> Self { - // retain only the paths which exist and ignore the rest - self - .canonicalized_ignore - .extend(paths.iter().filter_map(|i| canonicalize_path(i).ok())); - self - } - pub fn ignore_node_modules(mut self) -> Self { self.ignore_node_modules = true; self @@ -286,58 +279,62 @@ impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> { self } - pub fn collect_files( + pub fn collect_file_patterns( &self, - files: Option<&[PathBuf]>, + file_patterns: FilePatterns, ) -> Result<Vec<PathBuf>, AnyError> { let mut target_files = Vec::new(); - let files = if let Some(files) = files { - Cow::Borrowed(files) - } else { - Cow::Owned(vec![PathBuf::from(".")]) - }; - for file in files.iter() { - if let Ok(file) = canonicalize_path(file) { - // use an iterator like this in order to minimize the number of file system operations - let mut iterator = WalkDir::new(&file).into_iter(); - loop { - let e = match iterator.next() { - None => break, - Some(Err(_)) => continue, - Some(Ok(entry)) => entry, - }; - let file_type = e.file_type(); - let is_dir = file_type.is_dir(); - if let Ok(c) = canonicalize_path(e.path()) { - if self.canonicalized_ignore.iter().any(|i| c.starts_with(i)) { - if is_dir { - iterator.skip_current_dir(); - } - } else if is_dir { - let should_ignore_dir = c - .file_name() - .map(|dir_name| { - let dir_name = dir_name.to_string_lossy().to_lowercase(); - let is_ignored_file = match dir_name.as_str() { - "node_modules" => self.ignore_node_modules, - "vendor" => self.ignore_vendor_folder, - ".git" => self.ignore_git_folder, - _ => false, - }; - // allow the user to opt out of ignoring by explicitly specifying the dir - file != c && is_ignored_file - }) - .unwrap_or(false); - if should_ignore_dir { - iterator.skip_current_dir(); - } - } else if (self.file_filter)(e.path()) { - target_files.push(c); - } - } else if is_dir { - // failed canonicalizing, so skip it + let mut visited_paths = HashSet::new(); + let file_patterns_by_base = file_patterns.split_by_base(); + for (base, file_patterns) in file_patterns_by_base { + let file = normalize_path(base); + // use an iterator in order to minimize the number of file system operations + let mut iterator = WalkDir::new(&file) + .follow_links(false) // the default, but be explicit + .into_iter(); + loop { + let e = match iterator.next() { + None => break, + Some(Err(_)) => continue, + Some(Ok(entry)) => entry, + }; + let file_type = e.file_type(); + let is_dir = file_type.is_dir(); + let c = e.path().to_path_buf(); + if file_patterns.exclude.matches_path(&c) + || !is_dir + && !file_patterns + .include + .as_ref() + .map(|i| i.matches_path(&c)) + .unwrap_or(true) + { + if is_dir { iterator.skip_current_dir(); } + } else if is_dir { + let should_ignore_dir = c + .file_name() + .map(|dir_name| { + let dir_name = dir_name.to_string_lossy().to_lowercase(); + let is_ignored_file = match dir_name.as_str() { + "node_modules" => self.ignore_node_modules, + "vendor" => self.ignore_vendor_folder, + ".git" => self.ignore_git_folder, + _ => false, + }; + // allow the user to opt out of ignoring by explicitly specifying the dir + file != c && is_ignored_file + }) + .unwrap_or(false) + || !visited_paths.insert(c.clone()); + if should_ignore_dir { + iterator.skip_current_dir(); + } + } else if (self.file_filter)(&c, &file_patterns) + && visited_paths.insert(c.clone()) + { + target_files.push(c); } } } @@ -349,54 +346,68 @@ impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> { /// Specifiers that start with http and https are left intact. /// Note: This ignores all .git and node_modules folders. pub fn collect_specifiers( - files: &FilesConfig, - predicate: impl Fn(&Path) -> bool, + mut files: FilePatterns, + predicate: impl Fn(&Path, &FilePatterns) -> bool, ) -> Result<Vec<ModuleSpecifier>, AnyError> { let mut prepared = vec![]; - let file_collector = FileCollector::new(predicate) - .add_ignore_paths(&files.exclude) - .ignore_git_folder() - .ignore_node_modules() - .ignore_vendor_folder(); - - let root_path = current_dir()?; - let include_files = if let Some(include) = &files.include { - Cow::Borrowed(include) - } else { - Cow::Owned(vec![root_path.clone()]) - }; - for path in include_files.iter() { - let path = path.to_string_lossy(); - let lowercase_path = path.to_lowercase(); - if lowercase_path.starts_with("http://") - || lowercase_path.starts_with("https://") - { - let url = ModuleSpecifier::parse(&path)?; - prepared.push(url); - continue; - } - let p = if lowercase_path.starts_with("file://") { - specifier_to_file_path(&ModuleSpecifier::parse(&path)?)? - } else { - root_path.join(path.as_ref()) - }; - let p = normalize_path(p); - if p.is_dir() { - let test_files = file_collector.collect_files(Some(&[p]))?; - let mut test_files_as_urls = test_files - .iter() - .map(|f| ModuleSpecifier::from_file_path(f).unwrap()) - .collect::<Vec<ModuleSpecifier>>(); - - test_files_as_urls.sort(); - prepared.extend(test_files_as_urls); - } else { - let url = ModuleSpecifier::from_file_path(p).unwrap(); - prepared.push(url); + // break out the remote specifiers + if let Some(include_mut) = &mut files.include { + let includes = std::mem::take(include_mut); + let path_or_patterns = includes.into_path_or_patterns(); + let mut result = Vec::with_capacity(path_or_patterns.len()); + for path_or_pattern in path_or_patterns { + match path_or_pattern { + PathOrPattern::Path(path) => { + // todo(dsherret): we should improve this to not store URLs in a PathBuf + let path_str = path.to_string_lossy(); + let lowercase_path = path_str.to_lowercase(); + if lowercase_path.starts_with("http://") + || lowercase_path.starts_with("https://") + { + // take out the url + let url = ModuleSpecifier::parse(&path_str) + .with_context(|| format!("Invalid URL '{}'", path_str))?; + prepared.push(url); + } else if lowercase_path.starts_with("file://") { + let url = ModuleSpecifier::parse(&path_str) + .with_context(|| format!("Invalid URL '{}'", path_str))?; + let p = specifier_to_file_path(&url)?; + if p.is_dir() { + result.push(PathOrPattern::Path(p)); + } else { + prepared.push(url) + } + } else if path.is_dir() { + result.push(PathOrPattern::Path(path)); + } else if !files.exclude.matches_path(&path) { + let url = ModuleSpecifier::from_file_path(&path) + .map_err(|_| anyhow!("Invalid file path '{}'", path.display()))?; + prepared.push(url); + } + } + PathOrPattern::Pattern(pattern) => { + // add it back + result.push(PathOrPattern::Pattern(pattern)); + } + } } + *include_mut = PathOrPatternSet::new(result); } + let collected_files = FileCollector::new(predicate) + .ignore_git_folder() + .ignore_node_modules() + .ignore_vendor_folder() + .collect_file_patterns(files)?; + let mut collected_files_as_urls = collected_files + .iter() + .map(|f| ModuleSpecifier::from_file_path(f).unwrap()) + .collect::<Vec<ModuleSpecifier>>(); + + collected_files_as_urls.sort(); + prepared.extend(collected_files_as_urls); + Ok(prepared) } @@ -812,18 +823,29 @@ mod tests { let ignore_dir_files = ["g.d.ts", ".gitignore"]; create_files(&ignore_dir_path, &ignore_dir_files); - let file_collector = FileCollector::new(|path| { + let file_patterns = FilePatterns { + include: Some( + PathOrPatternSet::from_absolute_paths( + vec![root_dir_path.to_path_buf()], + ) + .unwrap(), + ), + exclude: PathOrPatternSet::from_absolute_paths(vec![ + ignore_dir_path.to_path_buf() + ]) + .unwrap(), + }; + let file_collector = FileCollector::new(|path, _| { // exclude dotfiles path .file_name() .and_then(|f| f.to_str()) .map(|f| !f.starts_with('.')) .unwrap_or(false) - }) - .add_ignore_paths(&[ignore_dir_path.to_path_buf()]); + }); let result = file_collector - .collect_files(Some(&[root_dir_path.to_path_buf()])) + .collect_file_patterns(file_patterns.clone()) .unwrap(); let expected = [ "README.md", @@ -850,7 +872,7 @@ mod tests { .ignore_node_modules() .ignore_vendor_folder(); let result = file_collector - .collect_files(Some(&[root_dir_path.to_path_buf()])) + .collect_file_patterns(file_patterns.clone()) .unwrap(); let expected = [ "README.md", @@ -869,12 +891,20 @@ mod tests { assert_eq!(file_names, expected); // test opting out of ignoring by specifying the dir - let result = file_collector - .collect_files(Some(&[ - root_dir_path.to_path_buf(), - root_dir_path.to_path_buf().join("child/node_modules/"), - ])) - .unwrap(); + let file_patterns = FilePatterns { + include: Some( + PathOrPatternSet::from_absolute_paths(vec![ + root_dir_path.to_path_buf(), + root_dir_path.to_path_buf().join("child/node_modules/"), + ]) + .unwrap(), + ), + exclude: PathOrPatternSet::from_absolute_paths(vec![ + ignore_dir_path.to_path_buf() + ]) + .unwrap(), + }; + let result = file_collector.collect_file_patterns(file_patterns).unwrap(); let expected = [ "README.md", "a.ts", @@ -930,7 +960,7 @@ mod tests { let ignore_dir_files = ["g.d.ts", ".gitignore"]; create_files(&ignore_dir_path, &ignore_dir_files); - let predicate = |path: &Path| { + let predicate = |path: &Path, _: &FilePatterns| { // exclude dotfiles path .file_name() @@ -940,38 +970,46 @@ mod tests { }; let result = collect_specifiers( - &FilesConfig { - include: Some(vec![ - PathBuf::from("http://localhost:8080"), - root_dir_path.to_path_buf(), - PathBuf::from("https://localhost:8080".to_string()), - ]), - exclude: vec![ignore_dir_path.to_path_buf()], + FilePatterns { + include: Some( + PathOrPatternSet::from_absolute_paths(vec![ + PathBuf::from("http://localhost:8080"), + root_dir_path.to_path_buf(), + PathBuf::from("https://localhost:8080".to_string()), + ]) + .unwrap(), + ), + exclude: PathOrPatternSet::from_absolute_paths(vec![ + ignore_dir_path.to_path_buf() + ]) + .unwrap(), }, predicate, ) .unwrap(); - let root_dir_url = - ModuleSpecifier::from_file_path(root_dir_path.canonicalize()) - .unwrap() - .to_string(); - let expected: Vec<ModuleSpecifier> = [ - "http://localhost:8080", - &format!("{root_dir_url}/a.ts"), - &format!("{root_dir_url}/b.js"), - &format!("{root_dir_url}/c.tsx"), - &format!("{root_dir_url}/child/README.md"), - &format!("{root_dir_url}/child/e.mjs"), - &format!("{root_dir_url}/child/f.mjsx"), - &format!("{root_dir_url}/d.jsx"), - "https://localhost:8080", - ] - .iter() - .map(|f| ModuleSpecifier::parse(f).unwrap()) - .collect::<Vec<_>>(); + let root_dir_url = ModuleSpecifier::from_file_path(&root_dir_path) + .unwrap() + .to_string(); + let expected = vec![ + "http://localhost:8080/".to_string(), + "https://localhost:8080/".to_string(), + format!("{root_dir_url}/a.ts"), + format!("{root_dir_url}/b.js"), + format!("{root_dir_url}/c.tsx"), + format!("{root_dir_url}/child/README.md"), + format!("{root_dir_url}/child/e.mjs"), + format!("{root_dir_url}/child/f.mjsx"), + format!("{root_dir_url}/d.jsx"), + ]; - assert_eq!(result, expected); + assert_eq!( + result + .into_iter() + .map(|s| s.to_string()) + .collect::<Vec<_>>(), + expected + ); let scheme = if cfg!(target_os = "windows") { "file:///" @@ -979,28 +1017,34 @@ mod tests { "file://" }; let result = collect_specifiers( - &FilesConfig { - include: Some(vec![PathBuf::from(format!( - "{}{}", - scheme, - root_dir_path.join("child").to_string().replace('\\', "/") - ))]), - exclude: vec![], + FilePatterns { + include: Some( + PathOrPatternSet::from_absolute_paths(vec![PathBuf::from(format!( + "{}{}", + scheme, + root_dir_path.join("child").to_string().replace('\\', "/") + ))]) + .unwrap(), + ), + exclude: Default::default(), }, predicate, ) .unwrap(); - let expected: Vec<ModuleSpecifier> = [ - &format!("{root_dir_url}/child/README.md"), - &format!("{root_dir_url}/child/e.mjs"), - &format!("{root_dir_url}/child/f.mjsx"), - ] - .iter() - .map(|f| ModuleSpecifier::parse(f).unwrap()) - .collect::<Vec<_>>(); + let expected = vec![ + format!("{root_dir_url}/child/README.md"), + format!("{root_dir_url}/child/e.mjs"), + format!("{root_dir_url}/child/f.mjsx"), + ]; - assert_eq!(result, expected); + assert_eq!( + result + .into_iter() + .map(|s| s.to_string()) + .collect::<Vec<_>>(), + expected + ); } #[tokio::test] |