diff options
Diffstat (limited to 'cli/util/glob.rs')
-rw-r--r-- | cli/util/glob.rs | 439 |
1 files changed, 397 insertions, 42 deletions
diff --git a/cli/util/glob.rs b/cli/util/glob.rs index f0cabc2ec..7bd600167 100644 --- a/cli/util/glob.rs +++ b/cli/util/glob.rs @@ -5,30 +5,238 @@ use std::path::PathBuf; use deno_core::anyhow::Context; use deno_core::error::AnyError; +use deno_core::normalize_path; +use deno_core::url::Url; +use indexmap::IndexMap; -pub fn expand_globs(paths: Vec<PathBuf>) -> Result<Vec<PathBuf>, AnyError> { - let mut new_paths = vec![]; - for path in paths { - let path_str = path.to_string_lossy(); - if is_glob_pattern(&path_str) { - let globbed_paths = glob(&path_str)?; +use super::path::specifier_to_file_path; + +#[derive(Clone, Default, Debug, Eq, PartialEq)] +pub struct FilePatterns { + pub include: Option<PathOrPatternSet>, + pub exclude: PathOrPatternSet, +} + +impl FilePatterns { + pub fn matches_specifier(&self, specifier: &Url) -> bool { + let path = match specifier_to_file_path(specifier) { + Ok(path) => path, + Err(_) => return true, + }; + self.matches_path(&path) + } - for globbed_path_result in globbed_paths { - new_paths.push(globbed_path_result?); + pub fn matches_path(&self, path: &Path) -> bool { + // Skip files in the exclude list. + if self.exclude.matches_path(path) { + return false; + } + + // Ignore files not in the include list if it's present. + self + .include + .as_ref() + .map(|m| m.matches_path(path)) + .unwrap_or(true) + } + + /// Creates a collection of `FilePatterns` by base where the containing patterns + /// are only the ones applicable to the base. + /// + /// The order these are returned in is the order that the directory traversal + /// should occur in. + pub fn split_by_base(&self) -> Vec<(PathBuf, Self)> { + let Some(include) = &self.include else { + return Vec::new(); + }; + + let mut include_paths = Vec::new(); + let mut include_patterns = Vec::new(); + for path_or_pattern in &include.0 { + match path_or_pattern { + PathOrPattern::Path(path) => include_paths.push((path.is_file(), path)), + PathOrPattern::Pattern(pattern) => include_patterns.push(pattern), + } + } + let include_patterns_by_base_path = include_patterns.into_iter().fold( + IndexMap::new(), + |mut map: IndexMap<_, Vec<_>>, p| { + map.entry(p.base_path()).or_default().push(p); + map + }, + ); + let exclude_by_base_path = self + .exclude + .0 + .iter() + .map(|s| (s.base_path(), s)) + .collect::<Vec<_>>(); + let get_applicable_excludes = + |is_file_path: bool, base_path: &PathBuf| -> Vec<PathOrPattern> { + exclude_by_base_path + .iter() + .filter_map(|(exclude_base_path, exclude)| { + match exclude { + PathOrPattern::Path(exclude_path) => { + // For explicitly specified files, ignore when the exclude path starts + // with it. Regardless, include excludes that are on a sub path of the dir. + if is_file_path && base_path.starts_with(exclude_path) + || exclude_path.starts_with(base_path) + { + Some((*exclude).clone()) + } else { + None + } + } + PathOrPattern::Pattern(_) => { + // include globs that's are sub paths or a parent path + if exclude_base_path.starts_with(base_path) + || base_path.starts_with(exclude_base_path) + { + Some((*exclude).clone()) + } else { + None + } + } + } + }) + .collect::<Vec<_>>() + }; + + let mut result = Vec::with_capacity( + include_paths.len() + include_patterns_by_base_path.len(), + ); + for (is_file, path) in include_paths { + let applicable_excludes = get_applicable_excludes(is_file, path); + result.push(( + path.clone(), + Self { + include: Some(PathOrPatternSet::new(vec![PathOrPattern::Path( + path.clone(), + )])), + exclude: PathOrPatternSet::new(applicable_excludes), + }, + )); + } + + // todo(dsherret): This could be further optimized by not including + // patterns that will only ever match another base. + for base_path in include_patterns_by_base_path.keys() { + let applicable_excludes = get_applicable_excludes(false, base_path); + let mut applicable_includes = Vec::new(); + // get all patterns that apply to the current or ancestor directories + for path in base_path.ancestors() { + if let Some(patterns) = include_patterns_by_base_path.get(path) { + applicable_includes.extend( + patterns + .iter() + .map(|p| PathOrPattern::Pattern((*p).clone())), + ); + } } - } else { - new_paths.push(path); + result.push(( + base_path.clone(), + Self { + include: Some(PathOrPatternSet::new(applicable_includes)), + exclude: PathOrPatternSet::new(applicable_excludes), + }, + )); } + + // Sort by the longest base path first. This ensures that we visit opted into + // nested directories first before visiting the parent directory. The directory + // traverser will handle not going into directories it's already been in. + result.sort_by(|a, b| b.0.as_os_str().len().cmp(&a.0.as_os_str().len())); + + result + } +} + +#[derive(Clone, Default, Debug, Eq, PartialEq)] +pub struct PathOrPatternSet(Vec<PathOrPattern>); + +impl PathOrPatternSet { + pub fn new(elements: Vec<PathOrPattern>) -> Self { + Self(elements) + } + + pub fn from_absolute_paths(path: Vec<PathBuf>) -> Result<Self, AnyError> { + Ok(Self( + path + .into_iter() + .map(PathOrPattern::new) + .collect::<Result<Vec<_>, _>>()?, + )) + } + + pub fn inner(&self) -> &Vec<PathOrPattern> { + &self.0 } - Ok(new_paths) + pub fn into_path_or_patterns(self) -> Vec<PathOrPattern> { + self.0 + } + + pub fn matches_path(&self, path: &Path) -> bool { + self.0.iter().any(|p| p.matches_path(path)) + } + + pub fn base_paths(&self) -> Vec<PathBuf> { + let mut result = Vec::with_capacity(self.0.len()); + for element in &self.0 { + match element { + PathOrPattern::Path(path) => { + result.push(path.to_path_buf()); + } + PathOrPattern::Pattern(pattern) => { + result.push(pattern.base_path()); + } + } + } + result + } } -pub fn glob(pattern: &str) -> Result<glob::Paths, AnyError> { - glob::glob_with(&escape_brackets(pattern), match_options()) - .with_context(|| format!("Failed to expand glob: \"{}\"", pattern)) +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum PathOrPattern { + Path(PathBuf), + Pattern(GlobPattern), } +impl PathOrPattern { + pub fn new(path: PathBuf) -> Result<Self, AnyError> { + let path_str = path.to_string_lossy(); + // todo(dsherret): don't store URLs in PathBufs + if path_str.starts_with("http:") + || path_str.starts_with("https:") + || path_str.starts_with("file:") + { + return Ok(Self::Path(path)); + } + + GlobPattern::new_if_pattern(&path_str).map(|maybe_pattern| { + maybe_pattern + .map(PathOrPattern::Pattern) + .unwrap_or_else(|| PathOrPattern::Path(normalize_path(path))) + }) + } + + pub fn matches_path(&self, path: &Path) -> bool { + match self { + PathOrPattern::Path(p) => path.starts_with(p), + PathOrPattern::Pattern(p) => p.matches_path(path), + } + } + + pub fn base_path(&self) -> PathBuf { + match self { + PathOrPattern::Path(p) => p.clone(), + PathOrPattern::Pattern(p) => p.base_path(), + } + } +} + +#[derive(Debug, Clone, Eq, PartialEq)] pub struct GlobPattern(glob::Pattern); impl GlobPattern { @@ -40,35 +248,38 @@ impl GlobPattern { } pub fn new(pattern: &str) -> Result<Self, AnyError> { - let pattern = glob::Pattern::new(pattern) - .with_context(|| format!("Failed to expand glob: \"{}\"", pattern))?; + let pattern = + glob::Pattern::new(&escape_brackets(pattern).replace('\\', "/")) + .with_context(|| format!("Failed to expand glob: \"{}\"", pattern))?; Ok(Self(pattern)) } pub fn matches_path(&self, path: &Path) -> bool { - self.0.matches_path(path) + self.0.matches_path_with(path, match_options()) } -} - -pub struct GlobSet(Vec<GlobPattern>); -impl GlobSet { - pub fn new(matchers: Vec<GlobPattern>) -> Self { - Self(matchers) - } - - pub fn matches_path(&self, path: &Path) -> bool { - for pattern in &self.0 { - if pattern.matches_path(path) { - return true; - } - } - false + pub fn base_path(&self) -> PathBuf { + let base_path = self + .0 + .as_str() + .split('/') + .take_while(|c| !has_glob_chars(c)) + .collect::<Vec<_>>() + .join(std::path::MAIN_SEPARATOR_STR); + PathBuf::from(base_path) } } pub fn is_glob_pattern(path: &str) -> bool { - path.chars().any(|c| matches!(c, '*' | '?')) + !path.starts_with("http:") + && !path.starts_with("https:") + && !path.starts_with("file:") + && has_glob_chars(path) +} + +fn has_glob_chars(pattern: &str) -> bool { + // we don't support [ and ] + pattern.chars().any(|c| matches!(c, '*' | '?')) } fn escape_brackets(pattern: &str) -> String { @@ -92,17 +303,161 @@ fn match_options() -> glob::MatchOptions { #[cfg(test)] mod test { + use pretty_assertions::assert_eq; + use test_util::TempDir; + use super::*; + // For easier comparisons in tests. + #[derive(Debug, PartialEq, Eq)] + struct ComparableFilePatterns { + include: Option<Vec<String>>, + exclude: Vec<String>, + } + + impl ComparableFilePatterns { + pub fn new(root: &Path, file_patterns: &FilePatterns) -> Self { + fn path_or_pattern_to_string(root: &Path, p: &PathOrPattern) -> String { + match p { + PathOrPattern::Path(p) => p + .strip_prefix(root) + .unwrap() + .to_string_lossy() + .replace('\\', "/"), + PathOrPattern::Pattern(p) => p + .0 + .as_str() + .strip_prefix(&format!( + "{}/", + root.to_string_lossy().replace('\\', "/") + )) + .unwrap() + .to_string(), + } + } + + Self { + include: file_patterns.include.as_ref().map(|p| { + p.0 + .iter() + .map(|p| path_or_pattern_to_string(root, p)) + .collect() + }), + exclude: file_patterns + .exclude + .0 + .iter() + .map(|p| path_or_pattern_to_string(root, p)) + .collect(), + } + } + + pub fn from_split( + root: &Path, + patterns_by_base: &[(PathBuf, FilePatterns)], + ) -> Vec<(String, ComparableFilePatterns)> { + patterns_by_base + .iter() + .map(|(base_path, file_patterns)| { + ( + base_path + .strip_prefix(root) + .unwrap() + .to_string_lossy() + .replace('\\', "/"), + ComparableFilePatterns::new(root, file_patterns), + ) + }) + .collect() + } + } + #[test] - pub fn glob_set_matches_path() { - let glob_set = GlobSet::new(vec![ - GlobPattern::new("foo/bar").unwrap(), - GlobPattern::new("foo/baz").unwrap(), - ]); - - assert!(glob_set.matches_path(Path::new("foo/bar"))); - assert!(glob_set.matches_path(Path::new("foo/baz"))); - assert!(!glob_set.matches_path(Path::new("foo/qux"))); + fn should_split_globs_by_base_dir() { + let temp_dir = TempDir::new(); + let patterns = FilePatterns { + include: Some(PathOrPatternSet::new(vec![ + PathOrPattern::Pattern( + GlobPattern::new(&format!( + "{}/inner/**/*.ts", + temp_dir.path().to_string_lossy().replace('\\', "/") + )) + .unwrap(), + ), + PathOrPattern::Pattern( + GlobPattern::new(&format!( + "{}/inner/sub/deeper/**/*.js", + temp_dir.path().to_string_lossy().replace('\\', "/") + )) + .unwrap(), + ), + PathOrPattern::Pattern( + GlobPattern::new(&format!( + "{}/other/**/*.js", + temp_dir.path().to_string_lossy().replace('\\', "/") + )) + .unwrap(), + ), + PathOrPattern::Path(temp_dir.path().join("sub/file.ts").to_path_buf()), + ])), + exclude: PathOrPatternSet::new(vec![ + PathOrPattern::Pattern( + GlobPattern::new(&format!( + "{}/inner/other/**/*.ts", + temp_dir.path().to_string_lossy().replace('\\', "/") + )) + .unwrap(), + ), + PathOrPattern::Path( + temp_dir + .path() + .join("inner/sub/deeper/file.js") + .to_path_buf(), + ), + ]), + }; + let split = ComparableFilePatterns::from_split( + temp_dir.path().as_path(), + &patterns.split_by_base(), + ); + assert_eq!( + split, + vec![ + ( + "inner/sub/deeper".to_string(), + ComparableFilePatterns { + include: Some(vec![ + "inner/sub/deeper/**/*.js".to_string(), + "inner/**/*.ts".to_string(), + ]), + exclude: vec!["inner/sub/deeper/file.js".to_string()], + } + ), + ( + "sub/file.ts".to_string(), + ComparableFilePatterns { + include: Some(vec!["sub/file.ts".to_string()]), + exclude: vec![], + } + ), + ( + "inner".to_string(), + ComparableFilePatterns { + include: Some(vec!["inner/**/*.ts".to_string()]), + exclude: vec![ + "inner/other/**/*.ts".to_string(), + "inner/sub/deeper/file.js".to_string(), + ], + } + ), + ( + "other".to_string(), + ComparableFilePatterns { + include: Some(vec!["other/**/*.js".to_string()]), + exclude: vec![], + } + ) + ] + ); } } |