summaryrefslogtreecommitdiff
path: root/cli/util/glob.rs
diff options
context:
space:
mode:
Diffstat (limited to 'cli/util/glob.rs')
-rw-r--r--cli/util/glob.rs439
1 files changed, 397 insertions, 42 deletions
diff --git a/cli/util/glob.rs b/cli/util/glob.rs
index f0cabc2ec..7bd600167 100644
--- a/cli/util/glob.rs
+++ b/cli/util/glob.rs
@@ -5,30 +5,238 @@ use std::path::PathBuf;
use deno_core::anyhow::Context;
use deno_core::error::AnyError;
+use deno_core::normalize_path;
+use deno_core::url::Url;
+use indexmap::IndexMap;
-pub fn expand_globs(paths: Vec<PathBuf>) -> Result<Vec<PathBuf>, AnyError> {
- let mut new_paths = vec![];
- for path in paths {
- let path_str = path.to_string_lossy();
- if is_glob_pattern(&path_str) {
- let globbed_paths = glob(&path_str)?;
+use super::path::specifier_to_file_path;
+
+#[derive(Clone, Default, Debug, Eq, PartialEq)]
+pub struct FilePatterns {
+ pub include: Option<PathOrPatternSet>,
+ pub exclude: PathOrPatternSet,
+}
+
+impl FilePatterns {
+ pub fn matches_specifier(&self, specifier: &Url) -> bool {
+ let path = match specifier_to_file_path(specifier) {
+ Ok(path) => path,
+ Err(_) => return true,
+ };
+ self.matches_path(&path)
+ }
- for globbed_path_result in globbed_paths {
- new_paths.push(globbed_path_result?);
+ pub fn matches_path(&self, path: &Path) -> bool {
+ // Skip files in the exclude list.
+ if self.exclude.matches_path(path) {
+ return false;
+ }
+
+ // Ignore files not in the include list if it's present.
+ self
+ .include
+ .as_ref()
+ .map(|m| m.matches_path(path))
+ .unwrap_or(true)
+ }
+
+ /// Creates a collection of `FilePatterns` by base where the containing patterns
+ /// are only the ones applicable to the base.
+ ///
+ /// The order these are returned in is the order that the directory traversal
+ /// should occur in.
+ pub fn split_by_base(&self) -> Vec<(PathBuf, Self)> {
+ let Some(include) = &self.include else {
+ return Vec::new();
+ };
+
+ let mut include_paths = Vec::new();
+ let mut include_patterns = Vec::new();
+ for path_or_pattern in &include.0 {
+ match path_or_pattern {
+ PathOrPattern::Path(path) => include_paths.push((path.is_file(), path)),
+ PathOrPattern::Pattern(pattern) => include_patterns.push(pattern),
+ }
+ }
+ let include_patterns_by_base_path = include_patterns.into_iter().fold(
+ IndexMap::new(),
+ |mut map: IndexMap<_, Vec<_>>, p| {
+ map.entry(p.base_path()).or_default().push(p);
+ map
+ },
+ );
+ let exclude_by_base_path = self
+ .exclude
+ .0
+ .iter()
+ .map(|s| (s.base_path(), s))
+ .collect::<Vec<_>>();
+ let get_applicable_excludes =
+ |is_file_path: bool, base_path: &PathBuf| -> Vec<PathOrPattern> {
+ exclude_by_base_path
+ .iter()
+ .filter_map(|(exclude_base_path, exclude)| {
+ match exclude {
+ PathOrPattern::Path(exclude_path) => {
+ // For explicitly specified files, ignore when the exclude path starts
+ // with it. Regardless, include excludes that are on a sub path of the dir.
+ if is_file_path && base_path.starts_with(exclude_path)
+ || exclude_path.starts_with(base_path)
+ {
+ Some((*exclude).clone())
+ } else {
+ None
+ }
+ }
+ PathOrPattern::Pattern(_) => {
+ // include globs that's are sub paths or a parent path
+ if exclude_base_path.starts_with(base_path)
+ || base_path.starts_with(exclude_base_path)
+ {
+ Some((*exclude).clone())
+ } else {
+ None
+ }
+ }
+ }
+ })
+ .collect::<Vec<_>>()
+ };
+
+ let mut result = Vec::with_capacity(
+ include_paths.len() + include_patterns_by_base_path.len(),
+ );
+ for (is_file, path) in include_paths {
+ let applicable_excludes = get_applicable_excludes(is_file, path);
+ result.push((
+ path.clone(),
+ Self {
+ include: Some(PathOrPatternSet::new(vec![PathOrPattern::Path(
+ path.clone(),
+ )])),
+ exclude: PathOrPatternSet::new(applicable_excludes),
+ },
+ ));
+ }
+
+ // todo(dsherret): This could be further optimized by not including
+ // patterns that will only ever match another base.
+ for base_path in include_patterns_by_base_path.keys() {
+ let applicable_excludes = get_applicable_excludes(false, base_path);
+ let mut applicable_includes = Vec::new();
+ // get all patterns that apply to the current or ancestor directories
+ for path in base_path.ancestors() {
+ if let Some(patterns) = include_patterns_by_base_path.get(path) {
+ applicable_includes.extend(
+ patterns
+ .iter()
+ .map(|p| PathOrPattern::Pattern((*p).clone())),
+ );
+ }
}
- } else {
- new_paths.push(path);
+ result.push((
+ base_path.clone(),
+ Self {
+ include: Some(PathOrPatternSet::new(applicable_includes)),
+ exclude: PathOrPatternSet::new(applicable_excludes),
+ },
+ ));
}
+
+ // Sort by the longest base path first. This ensures that we visit opted into
+ // nested directories first before visiting the parent directory. The directory
+ // traverser will handle not going into directories it's already been in.
+ result.sort_by(|a, b| b.0.as_os_str().len().cmp(&a.0.as_os_str().len()));
+
+ result
+ }
+}
+
+#[derive(Clone, Default, Debug, Eq, PartialEq)]
+pub struct PathOrPatternSet(Vec<PathOrPattern>);
+
+impl PathOrPatternSet {
+ pub fn new(elements: Vec<PathOrPattern>) -> Self {
+ Self(elements)
+ }
+
+ pub fn from_absolute_paths(path: Vec<PathBuf>) -> Result<Self, AnyError> {
+ Ok(Self(
+ path
+ .into_iter()
+ .map(PathOrPattern::new)
+ .collect::<Result<Vec<_>, _>>()?,
+ ))
+ }
+
+ pub fn inner(&self) -> &Vec<PathOrPattern> {
+ &self.0
}
- Ok(new_paths)
+ pub fn into_path_or_patterns(self) -> Vec<PathOrPattern> {
+ self.0
+ }
+
+ pub fn matches_path(&self, path: &Path) -> bool {
+ self.0.iter().any(|p| p.matches_path(path))
+ }
+
+ pub fn base_paths(&self) -> Vec<PathBuf> {
+ let mut result = Vec::with_capacity(self.0.len());
+ for element in &self.0 {
+ match element {
+ PathOrPattern::Path(path) => {
+ result.push(path.to_path_buf());
+ }
+ PathOrPattern::Pattern(pattern) => {
+ result.push(pattern.base_path());
+ }
+ }
+ }
+ result
+ }
}
-pub fn glob(pattern: &str) -> Result<glob::Paths, AnyError> {
- glob::glob_with(&escape_brackets(pattern), match_options())
- .with_context(|| format!("Failed to expand glob: \"{}\"", pattern))
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum PathOrPattern {
+ Path(PathBuf),
+ Pattern(GlobPattern),
}
+impl PathOrPattern {
+ pub fn new(path: PathBuf) -> Result<Self, AnyError> {
+ let path_str = path.to_string_lossy();
+ // todo(dsherret): don't store URLs in PathBufs
+ if path_str.starts_with("http:")
+ || path_str.starts_with("https:")
+ || path_str.starts_with("file:")
+ {
+ return Ok(Self::Path(path));
+ }
+
+ GlobPattern::new_if_pattern(&path_str).map(|maybe_pattern| {
+ maybe_pattern
+ .map(PathOrPattern::Pattern)
+ .unwrap_or_else(|| PathOrPattern::Path(normalize_path(path)))
+ })
+ }
+
+ pub fn matches_path(&self, path: &Path) -> bool {
+ match self {
+ PathOrPattern::Path(p) => path.starts_with(p),
+ PathOrPattern::Pattern(p) => p.matches_path(path),
+ }
+ }
+
+ pub fn base_path(&self) -> PathBuf {
+ match self {
+ PathOrPattern::Path(p) => p.clone(),
+ PathOrPattern::Pattern(p) => p.base_path(),
+ }
+ }
+}
+
+#[derive(Debug, Clone, Eq, PartialEq)]
pub struct GlobPattern(glob::Pattern);
impl GlobPattern {
@@ -40,35 +248,38 @@ impl GlobPattern {
}
pub fn new(pattern: &str) -> Result<Self, AnyError> {
- let pattern = glob::Pattern::new(pattern)
- .with_context(|| format!("Failed to expand glob: \"{}\"", pattern))?;
+ let pattern =
+ glob::Pattern::new(&escape_brackets(pattern).replace('\\', "/"))
+ .with_context(|| format!("Failed to expand glob: \"{}\"", pattern))?;
Ok(Self(pattern))
}
pub fn matches_path(&self, path: &Path) -> bool {
- self.0.matches_path(path)
+ self.0.matches_path_with(path, match_options())
}
-}
-
-pub struct GlobSet(Vec<GlobPattern>);
-impl GlobSet {
- pub fn new(matchers: Vec<GlobPattern>) -> Self {
- Self(matchers)
- }
-
- pub fn matches_path(&self, path: &Path) -> bool {
- for pattern in &self.0 {
- if pattern.matches_path(path) {
- return true;
- }
- }
- false
+ pub fn base_path(&self) -> PathBuf {
+ let base_path = self
+ .0
+ .as_str()
+ .split('/')
+ .take_while(|c| !has_glob_chars(c))
+ .collect::<Vec<_>>()
+ .join(std::path::MAIN_SEPARATOR_STR);
+ PathBuf::from(base_path)
}
}
pub fn is_glob_pattern(path: &str) -> bool {
- path.chars().any(|c| matches!(c, '*' | '?'))
+ !path.starts_with("http:")
+ && !path.starts_with("https:")
+ && !path.starts_with("file:")
+ && has_glob_chars(path)
+}
+
+fn has_glob_chars(pattern: &str) -> bool {
+ // we don't support [ and ]
+ pattern.chars().any(|c| matches!(c, '*' | '?'))
}
fn escape_brackets(pattern: &str) -> String {
@@ -92,17 +303,161 @@ fn match_options() -> glob::MatchOptions {
#[cfg(test)]
mod test {
+ use pretty_assertions::assert_eq;
+ use test_util::TempDir;
+
use super::*;
+ // For easier comparisons in tests.
+ #[derive(Debug, PartialEq, Eq)]
+ struct ComparableFilePatterns {
+ include: Option<Vec<String>>,
+ exclude: Vec<String>,
+ }
+
+ impl ComparableFilePatterns {
+ pub fn new(root: &Path, file_patterns: &FilePatterns) -> Self {
+ fn path_or_pattern_to_string(root: &Path, p: &PathOrPattern) -> String {
+ match p {
+ PathOrPattern::Path(p) => p
+ .strip_prefix(root)
+ .unwrap()
+ .to_string_lossy()
+ .replace('\\', "/"),
+ PathOrPattern::Pattern(p) => p
+ .0
+ .as_str()
+ .strip_prefix(&format!(
+ "{}/",
+ root.to_string_lossy().replace('\\', "/")
+ ))
+ .unwrap()
+ .to_string(),
+ }
+ }
+
+ Self {
+ include: file_patterns.include.as_ref().map(|p| {
+ p.0
+ .iter()
+ .map(|p| path_or_pattern_to_string(root, p))
+ .collect()
+ }),
+ exclude: file_patterns
+ .exclude
+ .0
+ .iter()
+ .map(|p| path_or_pattern_to_string(root, p))
+ .collect(),
+ }
+ }
+
+ pub fn from_split(
+ root: &Path,
+ patterns_by_base: &[(PathBuf, FilePatterns)],
+ ) -> Vec<(String, ComparableFilePatterns)> {
+ patterns_by_base
+ .iter()
+ .map(|(base_path, file_patterns)| {
+ (
+ base_path
+ .strip_prefix(root)
+ .unwrap()
+ .to_string_lossy()
+ .replace('\\', "/"),
+ ComparableFilePatterns::new(root, file_patterns),
+ )
+ })
+ .collect()
+ }
+ }
+
#[test]
- pub fn glob_set_matches_path() {
- let glob_set = GlobSet::new(vec![
- GlobPattern::new("foo/bar").unwrap(),
- GlobPattern::new("foo/baz").unwrap(),
- ]);
-
- assert!(glob_set.matches_path(Path::new("foo/bar")));
- assert!(glob_set.matches_path(Path::new("foo/baz")));
- assert!(!glob_set.matches_path(Path::new("foo/qux")));
+ fn should_split_globs_by_base_dir() {
+ let temp_dir = TempDir::new();
+ let patterns = FilePatterns {
+ include: Some(PathOrPatternSet::new(vec![
+ PathOrPattern::Pattern(
+ GlobPattern::new(&format!(
+ "{}/inner/**/*.ts",
+ temp_dir.path().to_string_lossy().replace('\\', "/")
+ ))
+ .unwrap(),
+ ),
+ PathOrPattern::Pattern(
+ GlobPattern::new(&format!(
+ "{}/inner/sub/deeper/**/*.js",
+ temp_dir.path().to_string_lossy().replace('\\', "/")
+ ))
+ .unwrap(),
+ ),
+ PathOrPattern::Pattern(
+ GlobPattern::new(&format!(
+ "{}/other/**/*.js",
+ temp_dir.path().to_string_lossy().replace('\\', "/")
+ ))
+ .unwrap(),
+ ),
+ PathOrPattern::Path(temp_dir.path().join("sub/file.ts").to_path_buf()),
+ ])),
+ exclude: PathOrPatternSet::new(vec![
+ PathOrPattern::Pattern(
+ GlobPattern::new(&format!(
+ "{}/inner/other/**/*.ts",
+ temp_dir.path().to_string_lossy().replace('\\', "/")
+ ))
+ .unwrap(),
+ ),
+ PathOrPattern::Path(
+ temp_dir
+ .path()
+ .join("inner/sub/deeper/file.js")
+ .to_path_buf(),
+ ),
+ ]),
+ };
+ let split = ComparableFilePatterns::from_split(
+ temp_dir.path().as_path(),
+ &patterns.split_by_base(),
+ );
+ assert_eq!(
+ split,
+ vec![
+ (
+ "inner/sub/deeper".to_string(),
+ ComparableFilePatterns {
+ include: Some(vec![
+ "inner/sub/deeper/**/*.js".to_string(),
+ "inner/**/*.ts".to_string(),
+ ]),
+ exclude: vec!["inner/sub/deeper/file.js".to_string()],
+ }
+ ),
+ (
+ "sub/file.ts".to_string(),
+ ComparableFilePatterns {
+ include: Some(vec!["sub/file.ts".to_string()]),
+ exclude: vec![],
+ }
+ ),
+ (
+ "inner".to_string(),
+ ComparableFilePatterns {
+ include: Some(vec!["inner/**/*.ts".to_string()]),
+ exclude: vec![
+ "inner/other/**/*.ts".to_string(),
+ "inner/sub/deeper/file.js".to_string(),
+ ],
+ }
+ ),
+ (
+ "other".to_string(),
+ ComparableFilePatterns {
+ include: Some(vec!["other/**/*.js".to_string()]),
+ exclude: vec![],
+ }
+ )
+ ]
+ );
}
}