diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2024-07-05 17:53:09 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-05 17:53:09 -0400 |
commit | 80df9aec1db449e6cc0f4513103aa442b8d43de3 (patch) | |
tree | e5a36781b8b75253b4896a2cdfd46116fde5af71 /cli/util | |
parent | d4d3a3c54f5e26dec0cc79e273dc488f8a47f2b3 (diff) |
refactor: move `FileCollector` to deno_config (#24433)
Diffstat (limited to 'cli/util')
-rw-r--r-- | cli/util/fs.rs | 339 | ||||
-rw-r--r-- | cli/util/gitignore.rs | 178 | ||||
-rw-r--r-- | cli/util/mod.rs | 1 | ||||
-rw-r--r-- | cli/util/sync/atomic_flag.rs | 35 | ||||
-rw-r--r-- | cli/util/sync/mod.rs | 4 |
5 files changed, 5 insertions, 552 deletions
diff --git a/cli/util/fs.rs b/cli/util/fs.rs index f33368d1a..c414abd59 100644 --- a/cli/util/fs.rs +++ b/cli/util/fs.rs @@ -1,8 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use std::collections::HashSet; use std::env::current_dir; -use std::fs::FileType; use std::fs::OpenOptions; use std::io::Error; use std::io::ErrorKind; @@ -11,11 +9,12 @@ use std::path::Path; use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; -use walkdir::WalkDir; +use deno_config::glob::FileCollector; use deno_config::glob::FilePatterns; use deno_config::glob::PathOrPattern; use deno_config::glob::PathOrPatternSet; +use deno_config::glob::WalkEntry; use deno_core::anyhow::anyhow; use deno_core::anyhow::Context; use deno_core::error::AnyError; @@ -25,8 +24,6 @@ use deno_core::ModuleSpecifier; use deno_runtime::deno_fs::FileSystem; use deno_runtime::deno_node::PathClean; -use crate::util::gitignore::DirGitIgnores; -use crate::util::gitignore::GitIgnoreTree; use crate::util::path::get_atomic_file_path; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; @@ -270,192 +267,6 @@ pub fn resolve_from_cwd(path: &Path) -> Result<PathBuf, AnyError> { Ok(normalize_path(resolved_path)) } -#[derive(Debug, Clone)] -pub struct WalkEntry<'a> { - pub path: &'a Path, - pub file_type: &'a FileType, - pub patterns: &'a FilePatterns, -} - -/// Collects file paths that satisfy the given predicate, by recursively walking `files`. -/// If the walker visits a path that is listed in `ignore`, it skips descending into the directory. -pub struct FileCollector<TFilter: Fn(WalkEntry) -> bool> { - file_filter: TFilter, - ignore_git_folder: bool, - ignore_node_modules: bool, - vendor_folder: Option<PathBuf>, - use_gitignore: bool, -} - -impl<TFilter: Fn(WalkEntry) -> bool> FileCollector<TFilter> { - pub fn new(file_filter: TFilter) -> Self { - Self { - file_filter, - ignore_git_folder: false, - ignore_node_modules: false, - vendor_folder: None, - use_gitignore: false, - } - } - - pub fn ignore_node_modules(mut self) -> Self { - self.ignore_node_modules = true; - self - } - - pub fn set_vendor_folder(mut self, vendor_folder: Option<PathBuf>) -> Self { - self.vendor_folder = vendor_folder; - self - } - - pub fn ignore_git_folder(mut self) -> Self { - self.ignore_git_folder = true; - self - } - - pub fn use_gitignore(mut self) -> Self { - self.use_gitignore = true; - self - } - - pub fn collect_file_patterns( - &self, - file_patterns: FilePatterns, - ) -> Result<Vec<PathBuf>, AnyError> { - fn is_pattern_matched( - maybe_git_ignore: Option<&DirGitIgnores>, - path: &Path, - is_dir: bool, - file_patterns: &FilePatterns, - ) -> bool { - use deno_config::glob::FilePatternsMatch; - - let path_kind = match is_dir { - true => deno_config::glob::PathKind::Directory, - false => deno_config::glob::PathKind::File, - }; - match file_patterns.matches_path_detail(path, path_kind) { - FilePatternsMatch::Passed => { - // check gitignore - let is_gitignored = maybe_git_ignore - .as_ref() - .map(|git_ignore| git_ignore.is_ignored(path, is_dir)) - .unwrap_or(false); - !is_gitignored - } - FilePatternsMatch::PassedOptedOutExclude => true, - FilePatternsMatch::Excluded => false, - } - } - - let mut maybe_git_ignores = if self.use_gitignore { - // Override explicitly specified include paths in the - // .gitignore file. This does not apply to globs because - // that is way too complicated to reason about. - let include_paths = file_patterns - .include - .as_ref() - .map(|include| { - include - .inner() - .iter() - .filter_map(|path_or_pattern| { - if let PathOrPattern::Path(p) = path_or_pattern { - Some(p.clone()) - } else { - None - } - }) - .collect::<Vec<_>>() - }) - .unwrap_or_default(); - Some(GitIgnoreTree::new( - Arc::new(deno_runtime::deno_fs::RealFs), - include_paths, - )) - } else { - None - }; - let mut target_files = Vec::new(); - let mut visited_paths = HashSet::new(); - let file_patterns_by_base = file_patterns.split_by_base(); - for file_patterns in file_patterns_by_base { - let file = normalize_path(&file_patterns.base); - // use an iterator in order to minimize the number of file system operations - let mut iterator = WalkDir::new(&file) - .follow_links(false) // the default, but be explicit - .into_iter(); - loop { - let e = match iterator.next() { - None => break, - Some(Err(_)) => continue, - Some(Ok(entry)) => entry, - }; - let file_type = e.file_type(); - let is_dir = file_type.is_dir(); - let path = e.path().to_path_buf(); - let maybe_gitignore = - maybe_git_ignores.as_mut().and_then(|git_ignores| { - if is_dir { - git_ignores.get_resolved_git_ignore_for_dir(&path) - } else { - git_ignores.get_resolved_git_ignore_for_file(&path) - } - }); - if !is_pattern_matched( - maybe_gitignore.as_deref(), - &path, - is_dir, - &file_patterns, - ) { - if is_dir { - iterator.skip_current_dir(); - } - } else if is_dir { - // allow the user to opt out of ignoring by explicitly specifying the dir - let opt_out_ignore = file == path; - let should_ignore_dir = !opt_out_ignore && self.is_ignored_dir(&path); - if should_ignore_dir || !visited_paths.insert(path.clone()) { - iterator.skip_current_dir(); - } - } else if (self.file_filter)(WalkEntry { - path: &path, - file_type: &file_type, - patterns: &file_patterns, - }) && visited_paths.insert(path.clone()) - { - target_files.push(path); - } - } - } - Ok(target_files) - } - - fn is_ignored_dir(&self, path: &Path) -> bool { - path - .file_name() - .map(|dir_name| { - let dir_name = dir_name.to_string_lossy().to_lowercase(); - let is_ignored_file = match dir_name.as_str() { - "node_modules" => self.ignore_node_modules, - ".git" => self.ignore_git_folder, - _ => false, - }; - is_ignored_file - }) - .unwrap_or(false) - || self.is_vendor_folder(path) - } - - fn is_vendor_folder(&self, path: &Path) -> bool { - self - .vendor_folder - .as_ref() - .map(|vendor_folder| path == *vendor_folder) - .unwrap_or(false) - } -} - /// Collects module specifiers that satisfy the given predicate as a file path, by recursively walking `include`. /// Specifiers that start with http and https are left intact. /// Note: This ignores all .git and node_modules folders. @@ -501,7 +312,7 @@ pub fn collect_specifiers( .ignore_git_folder() .ignore_node_modules() .set_vendor_folder(vendor_folder) - .collect_file_patterns(files)?; + .collect_file_patterns(&deno_config::fs::RealDenoConfigFs, files)?; let mut collected_files_as_urls = collected_files .iter() .map(|f| specifier_from_file_path(f).unwrap()) @@ -954,150 +765,6 @@ mod tests { } #[test] - fn test_collect_files() { - fn create_files(dir_path: &PathRef, files: &[&str]) { - dir_path.create_dir_all(); - for f in files { - dir_path.join(f).write(""); - } - } - - // dir.ts - // ├── a.ts - // ├── b.js - // ├── child - // | ├── git - // | | └── git.js - // | ├── node_modules - // | | └── node_modules.js - // | ├── vendor - // | | └── vendor.js - // │ ├── e.mjs - // │ ├── f.mjsx - // │ ├── .foo.TS - // │ └── README.md - // ├── c.tsx - // ├── d.jsx - // └── ignore - // ├── g.d.ts - // └── .gitignore - - let t = TempDir::new(); - - let root_dir_path = t.path().join("dir.ts"); - let root_dir_files = ["a.ts", "b.js", "c.tsx", "d.jsx"]; - create_files(&root_dir_path, &root_dir_files); - - let child_dir_path = root_dir_path.join("child"); - let child_dir_files = ["e.mjs", "f.mjsx", ".foo.TS", "README.md"]; - create_files(&child_dir_path, &child_dir_files); - - t.create_dir_all("dir.ts/child/node_modules"); - t.write("dir.ts/child/node_modules/node_modules.js", ""); - t.create_dir_all("dir.ts/child/.git"); - t.write("dir.ts/child/.git/git.js", ""); - t.create_dir_all("dir.ts/child/vendor"); - t.write("dir.ts/child/vendor/vendor.js", ""); - - let ignore_dir_path = root_dir_path.join("ignore"); - let ignore_dir_files = ["g.d.ts", ".gitignore"]; - create_files(&ignore_dir_path, &ignore_dir_files); - - let file_patterns = FilePatterns { - base: root_dir_path.to_path_buf(), - include: None, - exclude: PathOrPatternSet::new(vec![PathOrPattern::Path( - ignore_dir_path.to_path_buf(), - )]), - }; - let file_collector = FileCollector::new(|e| { - // exclude dotfiles - e.path - .file_name() - .and_then(|f| f.to_str()) - .map(|f| !f.starts_with('.')) - .unwrap_or(false) - }); - - let result = file_collector - .collect_file_patterns(file_patterns.clone()) - .unwrap(); - let expected = [ - "README.md", - "a.ts", - "b.js", - "c.tsx", - "d.jsx", - "e.mjs", - "f.mjsx", - "git.js", - "node_modules.js", - "vendor.js", - ]; - let mut file_names = result - .into_iter() - .map(|r| r.file_name().unwrap().to_string_lossy().to_string()) - .collect::<Vec<_>>(); - file_names.sort(); - assert_eq!(file_names, expected); - - // test ignoring the .git and node_modules folder - let file_collector = file_collector - .ignore_git_folder() - .ignore_node_modules() - .set_vendor_folder(Some(child_dir_path.join("vendor").to_path_buf())); - let result = file_collector - .collect_file_patterns(file_patterns.clone()) - .unwrap(); - let expected = [ - "README.md", - "a.ts", - "b.js", - "c.tsx", - "d.jsx", - "e.mjs", - "f.mjsx", - ]; - let mut file_names = result - .into_iter() - .map(|r| r.file_name().unwrap().to_string_lossy().to_string()) - .collect::<Vec<_>>(); - file_names.sort(); - assert_eq!(file_names, expected); - - // test opting out of ignoring by specifying the dir - let file_patterns = FilePatterns { - base: root_dir_path.to_path_buf(), - include: Some(PathOrPatternSet::new(vec![ - PathOrPattern::Path(root_dir_path.to_path_buf()), - PathOrPattern::Path( - root_dir_path.to_path_buf().join("child/node_modules/"), - ), - ])), - exclude: PathOrPatternSet::new(vec![PathOrPattern::Path( - ignore_dir_path.to_path_buf(), - )]), - }; - let result = file_collector.collect_file_patterns(file_patterns).unwrap(); - let expected = [ - "README.md", - "a.ts", - "b.js", - "c.tsx", - "d.jsx", - "e.mjs", - "f.mjsx", - "node_modules.js", - ]; - let mut file_names = result - .into_iter() - .map(|r| r.file_name().unwrap().to_string_lossy().to_string()) - .collect::<Vec<_>>(); - file_names.sort(); - assert_eq!(file_names, expected); - } - - #[test] fn test_collect_specifiers() { fn create_files(dir_path: &PathRef, files: &[&str]) { dir_path.create_dir_all(); diff --git a/cli/util/gitignore.rs b/cli/util/gitignore.rs deleted file mode 100644 index 4538e0912..000000000 --- a/cli/util/gitignore.rs +++ /dev/null @@ -1,178 +0,0 @@ -// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. - -use std::collections::HashMap; -use std::path::Path; -use std::path::PathBuf; -use std::rc::Rc; -use std::sync::Arc; - -/// Resolved gitignore for a directory. -pub struct DirGitIgnores { - current: Option<Rc<ignore::gitignore::Gitignore>>, - parent: Option<Rc<DirGitIgnores>>, -} - -impl DirGitIgnores { - pub fn is_ignored(&self, path: &Path, is_dir: bool) -> bool { - let mut is_ignored = false; - if let Some(parent) = &self.parent { - is_ignored = parent.is_ignored(path, is_dir); - } - if let Some(current) = &self.current { - match current.matched(path, is_dir) { - ignore::Match::None => {} - ignore::Match::Ignore(_) => { - is_ignored = true; - } - ignore::Match::Whitelist(_) => { - is_ignored = false; - } - } - } - is_ignored - } -} - -/// Resolves gitignores in a directory tree taking into account -/// ancestor gitignores that may be found in a directory. -pub struct GitIgnoreTree { - fs: Arc<dyn deno_runtime::deno_fs::FileSystem>, - ignores: HashMap<PathBuf, Option<Rc<DirGitIgnores>>>, - include_paths: Vec<PathBuf>, -} - -impl GitIgnoreTree { - pub fn new( - fs: Arc<dyn deno_runtime::deno_fs::FileSystem>, - // paths that should override what's in the gitignore - include_paths: Vec<PathBuf>, - ) -> Self { - Self { - fs, - ignores: Default::default(), - include_paths, - } - } - - pub fn get_resolved_git_ignore_for_dir( - &mut self, - dir_path: &Path, - ) -> Option<Rc<DirGitIgnores>> { - // for directories, provide itself in order to tell - // if it should stop searching for gitignores because - // maybe this dir_path is a .git directory - let parent = dir_path.parent()?; - self.get_resolved_git_ignore_inner(parent, Some(dir_path)) - } - - pub fn get_resolved_git_ignore_for_file( - &mut self, - file_path: &Path, - ) -> Option<Rc<DirGitIgnores>> { - let dir_path = file_path.parent()?; - self.get_resolved_git_ignore_inner(dir_path, None) - } - - fn get_resolved_git_ignore_inner( - &mut self, - dir_path: &Path, - maybe_parent: Option<&Path>, - ) -> Option<Rc<DirGitIgnores>> { - let maybe_resolved = self.ignores.get(dir_path).cloned(); - if let Some(resolved) = maybe_resolved { - resolved - } else { - let resolved = self.resolve_gitignore_in_dir(dir_path, maybe_parent); - self.ignores.insert(dir_path.to_owned(), resolved.clone()); - resolved - } - } - - fn resolve_gitignore_in_dir( - &mut self, - dir_path: &Path, - maybe_parent: Option<&Path>, - ) -> Option<Rc<DirGitIgnores>> { - if let Some(parent) = maybe_parent { - // stop searching if the parent dir had a .git directory in it - if self.fs.exists_sync(&parent.join(".git")) { - return None; - } - } - - let parent = dir_path.parent().and_then(|parent| { - self.get_resolved_git_ignore_inner(parent, Some(dir_path)) - }); - let current = self - .fs - .read_text_file_lossy_sync(&dir_path.join(".gitignore"), None) - .ok() - .and_then(|text| { - let mut builder = ignore::gitignore::GitignoreBuilder::new(dir_path); - for line in text.lines() { - builder.add_line(None, line).ok()?; - } - // override the gitignore contents to include these paths - for path in &self.include_paths { - if let Ok(suffix) = path.strip_prefix(dir_path) { - let suffix = suffix.to_string_lossy().replace('\\', "/"); - let _ignore = builder.add_line(None, &format!("!/{}", suffix)); - if !suffix.ends_with('/') { - let _ignore = builder.add_line(None, &format!("!/{}/", suffix)); - } - } - } - let gitignore = builder.build().ok()?; - Some(Rc::new(gitignore)) - }); - if parent.is_none() && current.is_none() { - None - } else { - Some(Rc::new(DirGitIgnores { current, parent })) - } - } -} - -#[cfg(test)] -mod test { - use deno_runtime::deno_fs::InMemoryFs; - - use super::*; - - #[test] - fn git_ignore_tree() { - let fs = InMemoryFs::default(); - fs.setup_text_files(vec![ - ("/.gitignore".into(), "file.txt".into()), - ("/sub_dir/.gitignore".into(), "data.txt".into()), - ( - "/sub_dir/sub_dir/.gitignore".into(), - "!file.txt\nignore.txt".into(), - ), - ]); - let mut ignore_tree = GitIgnoreTree::new(Arc::new(fs), Vec::new()); - let mut run_test = |path: &str, expected: bool| { - let path = PathBuf::from(path); - let gitignore = - ignore_tree.get_resolved_git_ignore_for_file(&path).unwrap(); - assert_eq!( - gitignore.is_ignored(&path, /* is_dir */ false), - expected, - "Path: {}", - path.display() - ); - }; - run_test("/file.txt", true); - run_test("/other.txt", false); - run_test("/data.txt", false); - run_test("/sub_dir/file.txt", true); - run_test("/sub_dir/other.txt", false); - run_test("/sub_dir/data.txt", true); - run_test("/sub_dir/sub_dir/file.txt", false); // unignored up here - run_test("/sub_dir/sub_dir/sub_dir/file.txt", false); - run_test("/sub_dir/sub_dir/sub_dir/ignore.txt", true); - run_test("/sub_dir/sub_dir/ignore.txt", true); - run_test("/sub_dir/ignore.txt", false); - run_test("/ignore.txt", false); - } -} diff --git a/cli/util/mod.rs b/cli/util/mod.rs index 69cdc77c3..2b6583fbc 100644 --- a/cli/util/mod.rs +++ b/cli/util/mod.rs @@ -9,7 +9,6 @@ pub mod display; pub mod draw_thread; pub mod file_watcher; pub mod fs; -pub mod gitignore; pub mod logger; pub mod path; pub mod progress_bar; diff --git a/cli/util/sync/atomic_flag.rs b/cli/util/sync/atomic_flag.rs deleted file mode 100644 index 75396dcf4..000000000 --- a/cli/util/sync/atomic_flag.rs +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. - -use std::sync::atomic::AtomicBool; -use std::sync::atomic::Ordering; - -/// Simplifies the use of an atomic boolean as a flag. -#[derive(Debug, Default)] -pub struct AtomicFlag(AtomicBool); - -impl AtomicFlag { - /// Raises the flag returning if the raise was successful. - pub fn raise(&self) -> bool { - !self.0.swap(true, Ordering::SeqCst) - } - - /// Gets if the flag is raised. - pub fn is_raised(&self) -> bool { - self.0.load(Ordering::SeqCst) - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn atomic_flag_raises() { - let flag = AtomicFlag::default(); - assert!(!flag.is_raised()); // false by default - assert!(flag.raise()); - assert!(flag.is_raised()); - assert!(!flag.raise()); - assert!(flag.is_raised()); - } -} diff --git a/cli/util/sync/mod.rs b/cli/util/sync/mod.rs index 28aab7f47..f58437503 100644 --- a/cli/util/sync/mod.rs +++ b/cli/util/sync/mod.rs @@ -1,14 +1,14 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. mod async_flag; -mod atomic_flag; mod sync_read_async_write_lock; mod task_queue; mod value_creator; pub use async_flag::AsyncFlag; -pub use atomic_flag::AtomicFlag; pub use sync_read_async_write_lock::SyncReadAsyncWriteLock; pub use task_queue::TaskQueue; pub use task_queue::TaskQueuePermit; pub use value_creator::MultiRuntimeAsyncValueCreator; +// todo(dsherret): this being in the unsync module is slightly confusing, but it's Sync +pub use deno_core::unsync::AtomicFlag; |