diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2022-12-07 13:10:10 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-07 13:10:10 -0500 |
commit | 9c1ab39e19073501618947ffa370ba59b04ec6cc (patch) | |
tree | ba729bae7d261bf38ede4179c98fb5e17928b8ea /cli | |
parent | f4385866f89e0abd3f5f1b0281abf00f1c562be9 (diff) |
feat: ignore `node_modules` and `.git` folders when collecting files everywhere (#16862)
We currently only do this for fmt. This makes it so they're excluded by
default, but you can still opt into these directories by explicitly
specifying them.
Diffstat (limited to 'cli')
-rw-r--r-- | cli/tools/coverage/mod.rs | 10 | ||||
-rw-r--r-- | cli/tools/fmt.rs | 55 | ||||
-rw-r--r-- | cli/tools/lint.rs | 54 | ||||
-rw-r--r-- | cli/util/fs.rs | 201 |
4 files changed, 212 insertions, 108 deletions
diff --git a/cli/tools/coverage/mod.rs b/cli/tools/coverage/mod.rs index f19cdfa3f..aacaf3d83 100644 --- a/cli/tools/coverage/mod.rs +++ b/cli/tools/coverage/mod.rs @@ -6,7 +6,7 @@ use crate::colors; use crate::emit::get_source_hash; use crate::proc_state::ProcState; use crate::tools::fmt::format_json; -use crate::util::fs::collect_files; +use crate::util::fs::FileCollector; use crate::util::text_encoding::source_map_from_code; use deno_ast::MediaType; @@ -558,9 +558,13 @@ fn collect_coverages( ignore: Vec<PathBuf>, ) -> Result<Vec<ScriptCoverage>, AnyError> { let mut coverages: Vec<ScriptCoverage> = Vec::new(); - let file_paths = collect_files(&files, &ignore, |file_path| { + let file_paths = FileCollector::new(|file_path| { file_path.extension().map_or(false, |ext| ext == "json") - })?; + }) + .ignore_git_folder() + .ignore_node_modules() + .add_ignore_paths(&ignore) + .collect_files(&files)?; for file_path in file_paths { let json = fs::read_to_string(file_path.as_path())?; diff --git a/cli/tools/fmt.rs b/cli/tools/fmt.rs index 721937b8d..7b5797d88 100644 --- a/cli/tools/fmt.rs +++ b/cli/tools/fmt.rs @@ -15,7 +15,7 @@ use crate::colors; use crate::util::diff::diff; use crate::util::file_watcher; use crate::util::file_watcher::ResolutionResult; -use crate::util::fs::collect_files; +use crate::util::fs::FileCollector; use crate::util::path::get_extension; use crate::util::path::specifier_to_file_path; use crate::util::text_encoding; @@ -92,17 +92,11 @@ pub async fn format( maybe_fmt_config.map(|c| c.options).unwrap_or_default(), ); - let fmt_predicate = |path: &Path| { - is_supported_ext_fmt(path) - && !contains_git(path) - && !contains_node_modules(path) - }; - let resolver = |changed: Option<Vec<PathBuf>>| { let files_changed = changed.is_some(); - let result = collect_files(&include_files, &exclude_files, fmt_predicate) - .map(|files| { + let result = + collect_fmt_files(&include_files, &exclude_files).map(|files| { let refmt_files = if let Some(paths) = changed { if check { files @@ -164,8 +158,8 @@ pub async fn format( ) .await?; } else { - let files = collect_files(&include_files, &exclude_files, fmt_predicate) - .and_then(|files| { + let files = + collect_fmt_files(&include_files, &exclude_files).and_then(|files| { if files.is_empty() { Err(generic_error("No target files found.")) } else { @@ -178,6 +172,17 @@ pub async fn format( Ok(()) } +fn collect_fmt_files( + include_files: &[PathBuf], + exclude_files: &[PathBuf], +) -> Result<Vec<PathBuf>, AnyError> { + FileCollector::new(is_supported_ext_fmt) + .ignore_git_folder() + .ignore_node_modules() + .add_ignore_paths(exclude_files) + .collect_files(include_files) +} + /// Formats markdown (using <https://github.com/dprint/dprint-plugin-markdown>) and its code blocks /// (ts/tsx, js/jsx). fn format_markdown( @@ -734,14 +739,6 @@ fn is_supported_ext_fmt(path: &Path) -> bool { } } -fn contains_git(path: &Path) -> bool { - path.components().any(|c| c.as_os_str() == ".git") -} - -fn contains_node_modules(path: &Path) -> bool { - path.components().any(|c| c.as_os_str() == "node_modules") -} - #[cfg(test)] mod test { use super::*; @@ -774,26 +771,6 @@ mod test { } #[test] - fn test_is_located_in_git() { - assert!(contains_git(Path::new("test/.git"))); - assert!(contains_git(Path::new(".git/bad.json"))); - assert!(contains_git(Path::new("test/.git/bad.json"))); - assert!(!contains_git(Path::new("test/bad.git/bad.json"))); - } - - #[test] - fn test_is_located_in_node_modules() { - assert!(contains_node_modules(Path::new("test/node_modules"))); - assert!(contains_node_modules(Path::new("node_modules/bad.json"))); - assert!(contains_node_modules(Path::new( - "test/node_modules/bad.json" - ))); - assert!(!contains_node_modules(Path::new( - "test/bad.node_modules/bad.json" - ))); - } - - #[test] #[should_panic(expected = "Formatting not stable. Bailed after 5 tries.")] fn test_format_ensure_stable_unstable_format() { format_ensure_stable( diff --git a/cli/tools/lint.rs b/cli/tools/lint.rs index 2f7cd5111..1b2487e4c 100644 --- a/cli/tools/lint.rs +++ b/cli/tools/lint.rs @@ -14,7 +14,7 @@ use crate::proc_state::ProcState; use crate::tools::fmt::run_parallelized; use crate::util::file_watcher; use crate::util::file_watcher::ResolutionResult; -use crate::util::fs::collect_files; +use crate::util::fs::FileCollector; use crate::util::path::is_supported_ext; use crate::util::path::specifier_to_file_path; use deno_ast::MediaType; @@ -143,19 +143,17 @@ pub async fn lint(flags: Flags, lint_flags: LintFlags) -> Result<(), AnyError> { let resolver = |changed: Option<Vec<PathBuf>>| { let files_changed = changed.is_some(); let result = - collect_files(&include_files, &exclude_files, is_supported_ext).map( - |files| { - if let Some(paths) = changed { - files - .iter() - .any(|path| paths.contains(path)) - .then_some(files) - .unwrap_or_else(|| [].to_vec()) - } else { - files - } - }, - ); + collect_lint_files(&include_files, &exclude_files).map(|files| { + if let Some(paths) = changed { + files + .iter() + .any(|path| paths.contains(path)) + .then_some(files) + .unwrap_or_else(|| [].to_vec()) + } else { + files + } + }); let paths_to_watch = include_files.clone(); @@ -251,15 +249,14 @@ pub async fn lint(flags: Flags, lint_flags: LintFlags) -> Result<(), AnyError> { ); reporter_lock.lock().unwrap().close(1); } else { - let target_files = - collect_files(&include_files, &exclude_files, is_supported_ext) - .and_then(|files| { - if files.is_empty() { - Err(generic_error("No target files found.")) - } else { - Ok(files) - } - })?; + let target_files = collect_lint_files(&include_files, &exclude_files) + .and_then(|files| { + if files.is_empty() { + Err(generic_error("No target files found.")) + } else { + Ok(files) + } + })?; debug!("Found {} files", target_files.len()); operation(target_files).await?; }; @@ -272,6 +269,17 @@ pub async fn lint(flags: Flags, lint_flags: LintFlags) -> Result<(), AnyError> { Ok(()) } +fn collect_lint_files( + include_files: &[PathBuf], + exclude_files: &[PathBuf], +) -> Result<Vec<PathBuf>, AnyError> { + FileCollector::new(is_supported_ext) + .ignore_git_folder() + .ignore_node_modules() + .add_ignore_paths(exclude_files) + .collect_files(include_files) +} + pub fn print_rules_list(json: bool) { let lint_rules = rules::get_recommended_rules(); diff --git a/cli/util/fs.rs b/cli/util/fs.rs index 35cdae4fa..40dfafdd8 100644 --- a/cli/util/fs.rs +++ b/cli/util/fs.rs @@ -165,53 +165,104 @@ pub fn resolve_from_cwd(path: &Path) -> Result<PathBuf, AnyError> { /// Collects file paths that satisfy the given predicate, by recursively walking `files`. /// If the walker visits a path that is listed in `ignore`, it skips descending into the directory. -pub fn collect_files<P>( - files: &[PathBuf], - ignore: &[PathBuf], - predicate: P, -) -> Result<Vec<PathBuf>, AnyError> -where - P: Fn(&Path) -> bool, -{ - let mut target_files = Vec::new(); - - // retain only the paths which exist and ignore the rest - let canonicalized_ignore: Vec<PathBuf> = ignore - .iter() - .filter_map(|i| canonicalize_path(i).ok()) - .collect(); +pub struct FileCollector<TFilter: Fn(&Path) -> bool> { + canonicalized_ignore: Vec<PathBuf>, + file_filter: TFilter, + ignore_git_folder: bool, + ignore_node_modules: bool, +} - for file in files { - for entry in WalkDir::new(file) - .into_iter() - .filter_entry(|e| { - canonicalize_path(e.path()).map_or(false, |c| { - !canonicalized_ignore.iter().any(|i| c.starts_with(i)) - }) - }) - .filter_map(|e| match e { - Ok(e) if !e.file_type().is_dir() && predicate(e.path()) => Some(e), - _ => None, - }) - { - target_files.push(canonicalize_path(entry.path())?) +impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> { + pub fn new(file_filter: TFilter) -> Self { + Self { + canonicalized_ignore: Default::default(), + file_filter, + ignore_git_folder: false, + ignore_node_modules: false, } } + pub fn add_ignore_paths(mut self, paths: &[PathBuf]) -> Self { + // retain only the paths which exist and ignore the rest + self + .canonicalized_ignore + .extend(paths.iter().filter_map(|i| canonicalize_path(i).ok())); + self + } + + pub fn ignore_node_modules(mut self) -> Self { + self.ignore_node_modules = true; + self + } + + pub fn ignore_git_folder(mut self) -> Self { + self.ignore_git_folder = true; + self + } - Ok(target_files) + pub fn collect_files( + &self, + files: &[PathBuf], + ) -> Result<Vec<PathBuf>, AnyError> { + let mut target_files = Vec::new(); + for file in files { + if let Ok(file) = canonicalize_path(file) { + // use an iterator like this in order to minimize the number of file system operations + let mut iterator = WalkDir::new(&file).into_iter(); + loop { + let e = match iterator.next() { + None => break, + Some(Err(_)) => continue, + Some(Ok(entry)) => entry, + }; + let file_type = e.file_type(); + let is_dir = file_type.is_dir(); + if let Ok(c) = canonicalize_path(e.path()) { + if self.canonicalized_ignore.iter().any(|i| c.starts_with(i)) { + if is_dir { + iterator.skip_current_dir(); + } + } else if is_dir { + let should_ignore_dir = c + .file_name() + .map(|dir_name| { + let dir_name = dir_name.to_string_lossy().to_lowercase(); + let is_ignored_file = self.ignore_node_modules + && dir_name == "node_modules" + || self.ignore_git_folder && dir_name == ".git"; + // allow the user to opt out of ignoring by explicitly specifying the dir + file != c && is_ignored_file + }) + .unwrap_or(false); + if should_ignore_dir { + iterator.skip_current_dir(); + } + } else if (self.file_filter)(e.path()) { + target_files.push(c); + } + } else if is_dir { + // failed canonicalizing, so skip it + iterator.skip_current_dir(); + } + } + } + } + Ok(target_files) + } } /// Collects module specifiers that satisfy the given predicate as a file path, by recursively walking `include`. /// Specifiers that start with http and https are left intact. -pub fn collect_specifiers<P>( +/// Note: This ignores all .git and node_modules folders. +pub fn collect_specifiers( include: Vec<String>, ignore: &[PathBuf], - predicate: P, -) -> Result<Vec<ModuleSpecifier>, AnyError> -where - P: Fn(&Path) -> bool, -{ + predicate: impl Fn(&Path) -> bool, +) -> Result<Vec<ModuleSpecifier>, AnyError> { let mut prepared = vec![]; + let file_collector = FileCollector::new(predicate) + .add_ignore_paths(ignore) + .ignore_git_folder() + .ignore_node_modules(); let root_path = current_dir()?; for path in include { @@ -231,7 +282,7 @@ where }; let p = normalize_path(&p); if p.is_dir() { - let test_files = collect_files(&[p], ignore, &predicate).unwrap(); + let test_files = file_collector.collect_files(&[p])?; let mut test_files_as_urls = test_files .iter() .map(|f| ModuleSpecifier::from_file_path(f).unwrap()) @@ -407,6 +458,7 @@ pub fn dir_size(path: &Path) -> std::io::Result<u64> { #[cfg(test)] mod tests { use super::*; + use pretty_assertions::assert_eq; use test_util::TempDir; #[test] @@ -466,6 +518,10 @@ mod tests { // ├── a.ts // ├── b.js // ├── child + // | ├── node_modules + // | | └── node_modules.js + // | ├── git + // | | └── git.js // │ ├── e.mjs // │ ├── f.mjsx // │ ├── .foo.TS @@ -486,31 +542,90 @@ mod tests { let child_dir_files = ["e.mjs", "f.mjsx", ".foo.TS", "README.md"]; create_files(&child_dir_path, &child_dir_files); + t.create_dir_all("dir.ts/child/node_modules"); + t.write("dir.ts/child/node_modules/node_modules.js", ""); + t.create_dir_all("dir.ts/child/.git"); + t.write("dir.ts/child/.git/git.js", ""); + let ignore_dir_path = root_dir_path.join("ignore"); let ignore_dir_files = ["g.d.ts", ".gitignore"]; create_files(&ignore_dir_path, &ignore_dir_files); - let result = collect_files(&[root_dir_path], &[ignore_dir_path], |path| { + let file_collector = FileCollector::new(|path| { // exclude dotfiles path .file_name() .and_then(|f| f.to_str()) .map_or(false, |f| !f.starts_with('.')) }) - .unwrap(); + .add_ignore_paths(&[ignore_dir_path]); + + let result = file_collector + .collect_files(&[root_dir_path.clone()]) + .unwrap(); + let expected = [ + "README.md", + "a.ts", + "b.js", + "c.tsx", + "d.jsx", + "e.mjs", + "f.mjsx", + "git.js", + "node_modules.js", + ]; + let mut file_names = result + .into_iter() + .map(|r| r.file_name().unwrap().to_string_lossy().to_string()) + .collect::<Vec<_>>(); + file_names.sort(); + assert_eq!(file_names, expected); + + // test ignoring the .git and node_modules folder + let file_collector = + file_collector.ignore_git_folder().ignore_node_modules(); + let result = file_collector + .collect_files(&[root_dir_path.clone()]) + .unwrap(); let expected = [ + "README.md", "a.ts", "b.js", + "c.tsx", + "d.jsx", "e.mjs", "f.mjsx", + ]; + let mut file_names = result + .into_iter() + .map(|r| r.file_name().unwrap().to_string_lossy().to_string()) + .collect::<Vec<_>>(); + file_names.sort(); + assert_eq!(file_names, expected); + + // test opting out of ignoring by specifying the dir + let result = file_collector + .collect_files(&[ + root_dir_path.clone(), + root_dir_path.join("child/node_modules/"), + ]) + .unwrap(); + let expected = [ "README.md", + "a.ts", + "b.js", "c.tsx", "d.jsx", + "e.mjs", + "f.mjsx", + "node_modules.js", ]; - for e in expected.iter() { - assert!(result.iter().any(|r| r.ends_with(e))); - } - assert_eq!(result.len(), expected.len()); + let mut file_names = result + .into_iter() + .map(|r| r.file_name().unwrap().to_string_lossy().to_string()) + .collect::<Vec<_>>(); + file_names.sort(); + assert_eq!(file_names, expected); } #[test] |