summaryrefslogtreecommitdiff
path: root/cli/util
diff options
context:
space:
mode:
authorDavid Sherret <dsherret@users.noreply.github.com>2024-01-08 12:18:42 -0500
committerGitHub <noreply@github.com>2024-01-08 17:18:42 +0000
commite212e1fc35ddae63f457f0f2a2e95154e008941f (patch)
tree6c1f553fbc529bfcab6413049af8f32ed31d1dfd /cli/util
parentee45d5bf8f2e1826b2a106b030abe891cfc0b37c (diff)
perf: skip expanding exclude globs (#21817)
We were calling `expand_glob` on our excludes, which is very expensive and unnecessary because we can pattern match while traversing instead. 1. Doesn't expand "exclude" globs. Instead pattern matches while walking the directory. 2. Splits up the "include" into base paths and applicable file patterns. This causes less pattern matching to occur because we're only pattern matching on patterns that might match and not ones in completely unrelated directories.
Diffstat (limited to 'cli/util')
-rw-r--r--cli/util/fs.rs358
-rw-r--r--cli/util/glob.rs439
2 files changed, 598 insertions, 199 deletions
diff --git a/cli/util/fs.rs b/cli/util/fs.rs
index f9fe9424f..86b17754b 100644
--- a/cli/util/fs.rs
+++ b/cli/util/fs.rs
@@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
+use deno_core::anyhow::anyhow;
use deno_core::anyhow::Context;
use deno_core::error::AnyError;
pub use deno_core::normalize_path;
@@ -8,7 +9,7 @@ use deno_core::ModuleSpecifier;
use deno_runtime::deno_crypto::rand;
use deno_runtime::deno_fs::FileSystem;
use deno_runtime::deno_node::PathClean;
-use std::borrow::Cow;
+use std::collections::HashSet;
use std::env::current_dir;
use std::fmt::Write as FmtWrite;
use std::fs::OpenOptions;
@@ -21,11 +22,13 @@ use std::sync::Arc;
use std::time::Duration;
use walkdir::WalkDir;
-use crate::args::FilesConfig;
use crate::util::progress_bar::ProgressBar;
use crate::util::progress_bar::ProgressBarStyle;
use crate::util::progress_bar::ProgressMessagePrompt;
+use super::glob::FilePatterns;
+use super::glob::PathOrPattern;
+use super::glob::PathOrPatternSet;
use super::path::specifier_to_file_path;
/// Writes the file to the file system at a temporary path, then
@@ -244,18 +247,16 @@ pub fn resolve_from_cwd(path: &Path) -> Result<PathBuf, AnyError> {
/// Collects file paths that satisfy the given predicate, by recursively walking `files`.
/// If the walker visits a path that is listed in `ignore`, it skips descending into the directory.
-pub struct FileCollector<TFilter: Fn(&Path) -> bool> {
- canonicalized_ignore: Vec<PathBuf>,
+pub struct FileCollector<TFilter: Fn(&Path, &FilePatterns) -> bool> {
file_filter: TFilter,
ignore_git_folder: bool,
ignore_node_modules: bool,
ignore_vendor_folder: bool,
}
-impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> {
+impl<TFilter: Fn(&Path, &FilePatterns) -> bool> FileCollector<TFilter> {
pub fn new(file_filter: TFilter) -> Self {
Self {
- canonicalized_ignore: Default::default(),
file_filter,
ignore_git_folder: false,
ignore_node_modules: false,
@@ -263,14 +264,6 @@ impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> {
}
}
- pub fn add_ignore_paths(mut self, paths: &[PathBuf]) -> Self {
- // retain only the paths which exist and ignore the rest
- self
- .canonicalized_ignore
- .extend(paths.iter().filter_map(|i| canonicalize_path(i).ok()));
- self
- }
-
pub fn ignore_node_modules(mut self) -> Self {
self.ignore_node_modules = true;
self
@@ -286,58 +279,62 @@ impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> {
self
}
- pub fn collect_files(
+ pub fn collect_file_patterns(
&self,
- files: Option<&[PathBuf]>,
+ file_patterns: FilePatterns,
) -> Result<Vec<PathBuf>, AnyError> {
let mut target_files = Vec::new();
- let files = if let Some(files) = files {
- Cow::Borrowed(files)
- } else {
- Cow::Owned(vec![PathBuf::from(".")])
- };
- for file in files.iter() {
- if let Ok(file) = canonicalize_path(file) {
- // use an iterator like this in order to minimize the number of file system operations
- let mut iterator = WalkDir::new(&file).into_iter();
- loop {
- let e = match iterator.next() {
- None => break,
- Some(Err(_)) => continue,
- Some(Ok(entry)) => entry,
- };
- let file_type = e.file_type();
- let is_dir = file_type.is_dir();
- if let Ok(c) = canonicalize_path(e.path()) {
- if self.canonicalized_ignore.iter().any(|i| c.starts_with(i)) {
- if is_dir {
- iterator.skip_current_dir();
- }
- } else if is_dir {
- let should_ignore_dir = c
- .file_name()
- .map(|dir_name| {
- let dir_name = dir_name.to_string_lossy().to_lowercase();
- let is_ignored_file = match dir_name.as_str() {
- "node_modules" => self.ignore_node_modules,
- "vendor" => self.ignore_vendor_folder,
- ".git" => self.ignore_git_folder,
- _ => false,
- };
- // allow the user to opt out of ignoring by explicitly specifying the dir
- file != c && is_ignored_file
- })
- .unwrap_or(false);
- if should_ignore_dir {
- iterator.skip_current_dir();
- }
- } else if (self.file_filter)(e.path()) {
- target_files.push(c);
- }
- } else if is_dir {
- // failed canonicalizing, so skip it
+ let mut visited_paths = HashSet::new();
+ let file_patterns_by_base = file_patterns.split_by_base();
+ for (base, file_patterns) in file_patterns_by_base {
+ let file = normalize_path(base);
+ // use an iterator in order to minimize the number of file system operations
+ let mut iterator = WalkDir::new(&file)
+ .follow_links(false) // the default, but be explicit
+ .into_iter();
+ loop {
+ let e = match iterator.next() {
+ None => break,
+ Some(Err(_)) => continue,
+ Some(Ok(entry)) => entry,
+ };
+ let file_type = e.file_type();
+ let is_dir = file_type.is_dir();
+ let c = e.path().to_path_buf();
+ if file_patterns.exclude.matches_path(&c)
+ || !is_dir
+ && !file_patterns
+ .include
+ .as_ref()
+ .map(|i| i.matches_path(&c))
+ .unwrap_or(true)
+ {
+ if is_dir {
iterator.skip_current_dir();
}
+ } else if is_dir {
+ let should_ignore_dir = c
+ .file_name()
+ .map(|dir_name| {
+ let dir_name = dir_name.to_string_lossy().to_lowercase();
+ let is_ignored_file = match dir_name.as_str() {
+ "node_modules" => self.ignore_node_modules,
+ "vendor" => self.ignore_vendor_folder,
+ ".git" => self.ignore_git_folder,
+ _ => false,
+ };
+ // allow the user to opt out of ignoring by explicitly specifying the dir
+ file != c && is_ignored_file
+ })
+ .unwrap_or(false)
+ || !visited_paths.insert(c.clone());
+ if should_ignore_dir {
+ iterator.skip_current_dir();
+ }
+ } else if (self.file_filter)(&c, &file_patterns)
+ && visited_paths.insert(c.clone())
+ {
+ target_files.push(c);
}
}
}
@@ -349,54 +346,68 @@ impl<TFilter: Fn(&Path) -> bool> FileCollector<TFilter> {
/// Specifiers that start with http and https are left intact.
/// Note: This ignores all .git and node_modules folders.
pub fn collect_specifiers(
- files: &FilesConfig,
- predicate: impl Fn(&Path) -> bool,
+ mut files: FilePatterns,
+ predicate: impl Fn(&Path, &FilePatterns) -> bool,
) -> Result<Vec<ModuleSpecifier>, AnyError> {
let mut prepared = vec![];
- let file_collector = FileCollector::new(predicate)
- .add_ignore_paths(&files.exclude)
- .ignore_git_folder()
- .ignore_node_modules()
- .ignore_vendor_folder();
-
- let root_path = current_dir()?;
- let include_files = if let Some(include) = &files.include {
- Cow::Borrowed(include)
- } else {
- Cow::Owned(vec![root_path.clone()])
- };
- for path in include_files.iter() {
- let path = path.to_string_lossy();
- let lowercase_path = path.to_lowercase();
- if lowercase_path.starts_with("http://")
- || lowercase_path.starts_with("https://")
- {
- let url = ModuleSpecifier::parse(&path)?;
- prepared.push(url);
- continue;
- }
- let p = if lowercase_path.starts_with("file://") {
- specifier_to_file_path(&ModuleSpecifier::parse(&path)?)?
- } else {
- root_path.join(path.as_ref())
- };
- let p = normalize_path(p);
- if p.is_dir() {
- let test_files = file_collector.collect_files(Some(&[p]))?;
- let mut test_files_as_urls = test_files
- .iter()
- .map(|f| ModuleSpecifier::from_file_path(f).unwrap())
- .collect::<Vec<ModuleSpecifier>>();
-
- test_files_as_urls.sort();
- prepared.extend(test_files_as_urls);
- } else {
- let url = ModuleSpecifier::from_file_path(p).unwrap();
- prepared.push(url);
+ // break out the remote specifiers
+ if let Some(include_mut) = &mut files.include {
+ let includes = std::mem::take(include_mut);
+ let path_or_patterns = includes.into_path_or_patterns();
+ let mut result = Vec::with_capacity(path_or_patterns.len());
+ for path_or_pattern in path_or_patterns {
+ match path_or_pattern {
+ PathOrPattern::Path(path) => {
+ // todo(dsherret): we should improve this to not store URLs in a PathBuf
+ let path_str = path.to_string_lossy();
+ let lowercase_path = path_str.to_lowercase();
+ if lowercase_path.starts_with("http://")
+ || lowercase_path.starts_with("https://")
+ {
+ // take out the url
+ let url = ModuleSpecifier::parse(&path_str)
+ .with_context(|| format!("Invalid URL '{}'", path_str))?;
+ prepared.push(url);
+ } else if lowercase_path.starts_with("file://") {
+ let url = ModuleSpecifier::parse(&path_str)
+ .with_context(|| format!("Invalid URL '{}'", path_str))?;
+ let p = specifier_to_file_path(&url)?;
+ if p.is_dir() {
+ result.push(PathOrPattern::Path(p));
+ } else {
+ prepared.push(url)
+ }
+ } else if path.is_dir() {
+ result.push(PathOrPattern::Path(path));
+ } else if !files.exclude.matches_path(&path) {
+ let url = ModuleSpecifier::from_file_path(&path)
+ .map_err(|_| anyhow!("Invalid file path '{}'", path.display()))?;
+ prepared.push(url);
+ }
+ }
+ PathOrPattern::Pattern(pattern) => {
+ // add it back
+ result.push(PathOrPattern::Pattern(pattern));
+ }
+ }
}
+ *include_mut = PathOrPatternSet::new(result);
}
+ let collected_files = FileCollector::new(predicate)
+ .ignore_git_folder()
+ .ignore_node_modules()
+ .ignore_vendor_folder()
+ .collect_file_patterns(files)?;
+ let mut collected_files_as_urls = collected_files
+ .iter()
+ .map(|f| ModuleSpecifier::from_file_path(f).unwrap())
+ .collect::<Vec<ModuleSpecifier>>();
+
+ collected_files_as_urls.sort();
+ prepared.extend(collected_files_as_urls);
+
Ok(prepared)
}
@@ -812,18 +823,29 @@ mod tests {
let ignore_dir_files = ["g.d.ts", ".gitignore"];
create_files(&ignore_dir_path, &ignore_dir_files);
- let file_collector = FileCollector::new(|path| {
+ let file_patterns = FilePatterns {
+ include: Some(
+ PathOrPatternSet::from_absolute_paths(
+ vec![root_dir_path.to_path_buf()],
+ )
+ .unwrap(),
+ ),
+ exclude: PathOrPatternSet::from_absolute_paths(vec![
+ ignore_dir_path.to_path_buf()
+ ])
+ .unwrap(),
+ };
+ let file_collector = FileCollector::new(|path, _| {
// exclude dotfiles
path
.file_name()
.and_then(|f| f.to_str())
.map(|f| !f.starts_with('.'))
.unwrap_or(false)
- })
- .add_ignore_paths(&[ignore_dir_path.to_path_buf()]);
+ });
let result = file_collector
- .collect_files(Some(&[root_dir_path.to_path_buf()]))
+ .collect_file_patterns(file_patterns.clone())
.unwrap();
let expected = [
"README.md",
@@ -850,7 +872,7 @@ mod tests {
.ignore_node_modules()
.ignore_vendor_folder();
let result = file_collector
- .collect_files(Some(&[root_dir_path.to_path_buf()]))
+ .collect_file_patterns(file_patterns.clone())
.unwrap();
let expected = [
"README.md",
@@ -869,12 +891,20 @@ mod tests {
assert_eq!(file_names, expected);
// test opting out of ignoring by specifying the dir
- let result = file_collector
- .collect_files(Some(&[
- root_dir_path.to_path_buf(),
- root_dir_path.to_path_buf().join("child/node_modules/"),
- ]))
- .unwrap();
+ let file_patterns = FilePatterns {
+ include: Some(
+ PathOrPatternSet::from_absolute_paths(vec![
+ root_dir_path.to_path_buf(),
+ root_dir_path.to_path_buf().join("child/node_modules/"),
+ ])
+ .unwrap(),
+ ),
+ exclude: PathOrPatternSet::from_absolute_paths(vec![
+ ignore_dir_path.to_path_buf()
+ ])
+ .unwrap(),
+ };
+ let result = file_collector.collect_file_patterns(file_patterns).unwrap();
let expected = [
"README.md",
"a.ts",
@@ -930,7 +960,7 @@ mod tests {
let ignore_dir_files = ["g.d.ts", ".gitignore"];
create_files(&ignore_dir_path, &ignore_dir_files);
- let predicate = |path: &Path| {
+ let predicate = |path: &Path, _: &FilePatterns| {
// exclude dotfiles
path
.file_name()
@@ -940,38 +970,46 @@ mod tests {
};
let result = collect_specifiers(
- &FilesConfig {
- include: Some(vec![
- PathBuf::from("http://localhost:8080"),
- root_dir_path.to_path_buf(),
- PathBuf::from("https://localhost:8080".to_string()),
- ]),
- exclude: vec![ignore_dir_path.to_path_buf()],
+ FilePatterns {
+ include: Some(
+ PathOrPatternSet::from_absolute_paths(vec![
+ PathBuf::from("http://localhost:8080"),
+ root_dir_path.to_path_buf(),
+ PathBuf::from("https://localhost:8080".to_string()),
+ ])
+ .unwrap(),
+ ),
+ exclude: PathOrPatternSet::from_absolute_paths(vec![
+ ignore_dir_path.to_path_buf()
+ ])
+ .unwrap(),
},
predicate,
)
.unwrap();
- let root_dir_url =
- ModuleSpecifier::from_file_path(root_dir_path.canonicalize())
- .unwrap()
- .to_string();
- let expected: Vec<ModuleSpecifier> = [
- "http://localhost:8080",
- &format!("{root_dir_url}/a.ts"),
- &format!("{root_dir_url}/b.js"),
- &format!("{root_dir_url}/c.tsx"),
- &format!("{root_dir_url}/child/README.md"),
- &format!("{root_dir_url}/child/e.mjs"),
- &format!("{root_dir_url}/child/f.mjsx"),
- &format!("{root_dir_url}/d.jsx"),
- "https://localhost:8080",
- ]
- .iter()
- .map(|f| ModuleSpecifier::parse(f).unwrap())
- .collect::<Vec<_>>();
+ let root_dir_url = ModuleSpecifier::from_file_path(&root_dir_path)
+ .unwrap()
+ .to_string();
+ let expected = vec![
+ "http://localhost:8080/".to_string(),
+ "https://localhost:8080/".to_string(),
+ format!("{root_dir_url}/a.ts"),
+ format!("{root_dir_url}/b.js"),
+ format!("{root_dir_url}/c.tsx"),
+ format!("{root_dir_url}/child/README.md"),
+ format!("{root_dir_url}/child/e.mjs"),
+ format!("{root_dir_url}/child/f.mjsx"),
+ format!("{root_dir_url}/d.jsx"),
+ ];
- assert_eq!(result, expected);
+ assert_eq!(
+ result
+ .into_iter()
+ .map(|s| s.to_string())
+ .collect::<Vec<_>>(),
+ expected
+ );
let scheme = if cfg!(target_os = "windows") {
"file:///"
@@ -979,28 +1017,34 @@ mod tests {
"file://"
};
let result = collect_specifiers(
- &FilesConfig {
- include: Some(vec![PathBuf::from(format!(
- "{}{}",
- scheme,
- root_dir_path.join("child").to_string().replace('\\', "/")
- ))]),
- exclude: vec![],
+ FilePatterns {
+ include: Some(
+ PathOrPatternSet::from_absolute_paths(vec![PathBuf::from(format!(
+ "{}{}",
+ scheme,
+ root_dir_path.join("child").to_string().replace('\\', "/")
+ ))])
+ .unwrap(),
+ ),
+ exclude: Default::default(),
},
predicate,
)
.unwrap();
- let expected: Vec<ModuleSpecifier> = [
- &format!("{root_dir_url}/child/README.md"),
- &format!("{root_dir_url}/child/e.mjs"),
- &format!("{root_dir_url}/child/f.mjsx"),
- ]
- .iter()
- .map(|f| ModuleSpecifier::parse(f).unwrap())
- .collect::<Vec<_>>();
+ let expected = vec![
+ format!("{root_dir_url}/child/README.md"),
+ format!("{root_dir_url}/child/e.mjs"),
+ format!("{root_dir_url}/child/f.mjsx"),
+ ];
- assert_eq!(result, expected);
+ assert_eq!(
+ result
+ .into_iter()
+ .map(|s| s.to_string())
+ .collect::<Vec<_>>(),
+ expected
+ );
}
#[tokio::test]
diff --git a/cli/util/glob.rs b/cli/util/glob.rs
index f0cabc2ec..7bd600167 100644
--- a/cli/util/glob.rs
+++ b/cli/util/glob.rs
@@ -5,30 +5,238 @@ use std::path::PathBuf;
use deno_core::anyhow::Context;
use deno_core::error::AnyError;
+use deno_core::normalize_path;
+use deno_core::url::Url;
+use indexmap::IndexMap;
-pub fn expand_globs(paths: Vec<PathBuf>) -> Result<Vec<PathBuf>, AnyError> {
- let mut new_paths = vec![];
- for path in paths {
- let path_str = path.to_string_lossy();
- if is_glob_pattern(&path_str) {
- let globbed_paths = glob(&path_str)?;
+use super::path::specifier_to_file_path;
+
+#[derive(Clone, Default, Debug, Eq, PartialEq)]
+pub struct FilePatterns {
+ pub include: Option<PathOrPatternSet>,
+ pub exclude: PathOrPatternSet,
+}
+
+impl FilePatterns {
+ pub fn matches_specifier(&self, specifier: &Url) -> bool {
+ let path = match specifier_to_file_path(specifier) {
+ Ok(path) => path,
+ Err(_) => return true,
+ };
+ self.matches_path(&path)
+ }
- for globbed_path_result in globbed_paths {
- new_paths.push(globbed_path_result?);
+ pub fn matches_path(&self, path: &Path) -> bool {
+ // Skip files in the exclude list.
+ if self.exclude.matches_path(path) {
+ return false;
+ }
+
+ // Ignore files not in the include list if it's present.
+ self
+ .include
+ .as_ref()
+ .map(|m| m.matches_path(path))
+ .unwrap_or(true)
+ }
+
+ /// Creates a collection of `FilePatterns` by base where the containing patterns
+ /// are only the ones applicable to the base.
+ ///
+ /// The order these are returned in is the order that the directory traversal
+ /// should occur in.
+ pub fn split_by_base(&self) -> Vec<(PathBuf, Self)> {
+ let Some(include) = &self.include else {
+ return Vec::new();
+ };
+
+ let mut include_paths = Vec::new();
+ let mut include_patterns = Vec::new();
+ for path_or_pattern in &include.0 {
+ match path_or_pattern {
+ PathOrPattern::Path(path) => include_paths.push((path.is_file(), path)),
+ PathOrPattern::Pattern(pattern) => include_patterns.push(pattern),
+ }
+ }
+ let include_patterns_by_base_path = include_patterns.into_iter().fold(
+ IndexMap::new(),
+ |mut map: IndexMap<_, Vec<_>>, p| {
+ map.entry(p.base_path()).or_default().push(p);
+ map
+ },
+ );
+ let exclude_by_base_path = self
+ .exclude
+ .0
+ .iter()
+ .map(|s| (s.base_path(), s))
+ .collect::<Vec<_>>();
+ let get_applicable_excludes =
+ |is_file_path: bool, base_path: &PathBuf| -> Vec<PathOrPattern> {
+ exclude_by_base_path
+ .iter()
+ .filter_map(|(exclude_base_path, exclude)| {
+ match exclude {
+ PathOrPattern::Path(exclude_path) => {
+ // For explicitly specified files, ignore when the exclude path starts
+ // with it. Regardless, include excludes that are on a sub path of the dir.
+ if is_file_path && base_path.starts_with(exclude_path)
+ || exclude_path.starts_with(base_path)
+ {
+ Some((*exclude).clone())
+ } else {
+ None
+ }
+ }
+ PathOrPattern::Pattern(_) => {
+ // include globs that's are sub paths or a parent path
+ if exclude_base_path.starts_with(base_path)
+ || base_path.starts_with(exclude_base_path)
+ {
+ Some((*exclude).clone())
+ } else {
+ None
+ }
+ }
+ }
+ })
+ .collect::<Vec<_>>()
+ };
+
+ let mut result = Vec::with_capacity(
+ include_paths.len() + include_patterns_by_base_path.len(),
+ );
+ for (is_file, path) in include_paths {
+ let applicable_excludes = get_applicable_excludes(is_file, path);
+ result.push((
+ path.clone(),
+ Self {
+ include: Some(PathOrPatternSet::new(vec![PathOrPattern::Path(
+ path.clone(),
+ )])),
+ exclude: PathOrPatternSet::new(applicable_excludes),
+ },
+ ));
+ }
+
+ // todo(dsherret): This could be further optimized by not including
+ // patterns that will only ever match another base.
+ for base_path in include_patterns_by_base_path.keys() {
+ let applicable_excludes = get_applicable_excludes(false, base_path);
+ let mut applicable_includes = Vec::new();
+ // get all patterns that apply to the current or ancestor directories
+ for path in base_path.ancestors() {
+ if let Some(patterns) = include_patterns_by_base_path.get(path) {
+ applicable_includes.extend(
+ patterns
+ .iter()
+ .map(|p| PathOrPattern::Pattern((*p).clone())),
+ );
+ }
}
- } else {
- new_paths.push(path);
+ result.push((
+ base_path.clone(),
+ Self {
+ include: Some(PathOrPatternSet::new(applicable_includes)),
+ exclude: PathOrPatternSet::new(applicable_excludes),
+ },
+ ));
}
+
+ // Sort by the longest base path first. This ensures that we visit opted into
+ // nested directories first before visiting the parent directory. The directory
+ // traverser will handle not going into directories it's already been in.
+ result.sort_by(|a, b| b.0.as_os_str().len().cmp(&a.0.as_os_str().len()));
+
+ result
+ }
+}
+
+#[derive(Clone, Default, Debug, Eq, PartialEq)]
+pub struct PathOrPatternSet(Vec<PathOrPattern>);
+
+impl PathOrPatternSet {
+ pub fn new(elements: Vec<PathOrPattern>) -> Self {
+ Self(elements)
+ }
+
+ pub fn from_absolute_paths(path: Vec<PathBuf>) -> Result<Self, AnyError> {
+ Ok(Self(
+ path
+ .into_iter()
+ .map(PathOrPattern::new)
+ .collect::<Result<Vec<_>, _>>()?,
+ ))
+ }
+
+ pub fn inner(&self) -> &Vec<PathOrPattern> {
+ &self.0
}
- Ok(new_paths)
+ pub fn into_path_or_patterns(self) -> Vec<PathOrPattern> {
+ self.0
+ }
+
+ pub fn matches_path(&self, path: &Path) -> bool {
+ self.0.iter().any(|p| p.matches_path(path))
+ }
+
+ pub fn base_paths(&self) -> Vec<PathBuf> {
+ let mut result = Vec::with_capacity(self.0.len());
+ for element in &self.0 {
+ match element {
+ PathOrPattern::Path(path) => {
+ result.push(path.to_path_buf());
+ }
+ PathOrPattern::Pattern(pattern) => {
+ result.push(pattern.base_path());
+ }
+ }
+ }
+ result
+ }
}
-pub fn glob(pattern: &str) -> Result<glob::Paths, AnyError> {
- glob::glob_with(&escape_brackets(pattern), match_options())
- .with_context(|| format!("Failed to expand glob: \"{}\"", pattern))
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum PathOrPattern {
+ Path(PathBuf),
+ Pattern(GlobPattern),
}
+impl PathOrPattern {
+ pub fn new(path: PathBuf) -> Result<Self, AnyError> {
+ let path_str = path.to_string_lossy();
+ // todo(dsherret): don't store URLs in PathBufs
+ if path_str.starts_with("http:")
+ || path_str.starts_with("https:")
+ || path_str.starts_with("file:")
+ {
+ return Ok(Self::Path(path));
+ }
+
+ GlobPattern::new_if_pattern(&path_str).map(|maybe_pattern| {
+ maybe_pattern
+ .map(PathOrPattern::Pattern)
+ .unwrap_or_else(|| PathOrPattern::Path(normalize_path(path)))
+ })
+ }
+
+ pub fn matches_path(&self, path: &Path) -> bool {
+ match self {
+ PathOrPattern::Path(p) => path.starts_with(p),
+ PathOrPattern::Pattern(p) => p.matches_path(path),
+ }
+ }
+
+ pub fn base_path(&self) -> PathBuf {
+ match self {
+ PathOrPattern::Path(p) => p.clone(),
+ PathOrPattern::Pattern(p) => p.base_path(),
+ }
+ }
+}
+
+#[derive(Debug, Clone, Eq, PartialEq)]
pub struct GlobPattern(glob::Pattern);
impl GlobPattern {
@@ -40,35 +248,38 @@ impl GlobPattern {
}
pub fn new(pattern: &str) -> Result<Self, AnyError> {
- let pattern = glob::Pattern::new(pattern)
- .with_context(|| format!("Failed to expand glob: \"{}\"", pattern))?;
+ let pattern =
+ glob::Pattern::new(&escape_brackets(pattern).replace('\\', "/"))
+ .with_context(|| format!("Failed to expand glob: \"{}\"", pattern))?;
Ok(Self(pattern))
}
pub fn matches_path(&self, path: &Path) -> bool {
- self.0.matches_path(path)
+ self.0.matches_path_with(path, match_options())
}
-}
-
-pub struct GlobSet(Vec<GlobPattern>);
-impl GlobSet {
- pub fn new(matchers: Vec<GlobPattern>) -> Self {
- Self(matchers)
- }
-
- pub fn matches_path(&self, path: &Path) -> bool {
- for pattern in &self.0 {
- if pattern.matches_path(path) {
- return true;
- }
- }
- false
+ pub fn base_path(&self) -> PathBuf {
+ let base_path = self
+ .0
+ .as_str()
+ .split('/')
+ .take_while(|c| !has_glob_chars(c))
+ .collect::<Vec<_>>()
+ .join(std::path::MAIN_SEPARATOR_STR);
+ PathBuf::from(base_path)
}
}
pub fn is_glob_pattern(path: &str) -> bool {
- path.chars().any(|c| matches!(c, '*' | '?'))
+ !path.starts_with("http:")
+ && !path.starts_with("https:")
+ && !path.starts_with("file:")
+ && has_glob_chars(path)
+}
+
+fn has_glob_chars(pattern: &str) -> bool {
+ // we don't support [ and ]
+ pattern.chars().any(|c| matches!(c, '*' | '?'))
}
fn escape_brackets(pattern: &str) -> String {
@@ -92,17 +303,161 @@ fn match_options() -> glob::MatchOptions {
#[cfg(test)]
mod test {
+ use pretty_assertions::assert_eq;
+ use test_util::TempDir;
+
use super::*;
+ // For easier comparisons in tests.
+ #[derive(Debug, PartialEq, Eq)]
+ struct ComparableFilePatterns {
+ include: Option<Vec<String>>,
+ exclude: Vec<String>,
+ }
+
+ impl ComparableFilePatterns {
+ pub fn new(root: &Path, file_patterns: &FilePatterns) -> Self {
+ fn path_or_pattern_to_string(root: &Path, p: &PathOrPattern) -> String {
+ match p {
+ PathOrPattern::Path(p) => p
+ .strip_prefix(root)
+ .unwrap()
+ .to_string_lossy()
+ .replace('\\', "/"),
+ PathOrPattern::Pattern(p) => p
+ .0
+ .as_str()
+ .strip_prefix(&format!(
+ "{}/",
+ root.to_string_lossy().replace('\\', "/")
+ ))
+ .unwrap()
+ .to_string(),
+ }
+ }
+
+ Self {
+ include: file_patterns.include.as_ref().map(|p| {
+ p.0
+ .iter()
+ .map(|p| path_or_pattern_to_string(root, p))
+ .collect()
+ }),
+ exclude: file_patterns
+ .exclude
+ .0
+ .iter()
+ .map(|p| path_or_pattern_to_string(root, p))
+ .collect(),
+ }
+ }
+
+ pub fn from_split(
+ root: &Path,
+ patterns_by_base: &[(PathBuf, FilePatterns)],
+ ) -> Vec<(String, ComparableFilePatterns)> {
+ patterns_by_base
+ .iter()
+ .map(|(base_path, file_patterns)| {
+ (
+ base_path
+ .strip_prefix(root)
+ .unwrap()
+ .to_string_lossy()
+ .replace('\\', "/"),
+ ComparableFilePatterns::new(root, file_patterns),
+ )
+ })
+ .collect()
+ }
+ }
+
#[test]
- pub fn glob_set_matches_path() {
- let glob_set = GlobSet::new(vec![
- GlobPattern::new("foo/bar").unwrap(),
- GlobPattern::new("foo/baz").unwrap(),
- ]);
-
- assert!(glob_set.matches_path(Path::new("foo/bar")));
- assert!(glob_set.matches_path(Path::new("foo/baz")));
- assert!(!glob_set.matches_path(Path::new("foo/qux")));
+ fn should_split_globs_by_base_dir() {
+ let temp_dir = TempDir::new();
+ let patterns = FilePatterns {
+ include: Some(PathOrPatternSet::new(vec![
+ PathOrPattern::Pattern(
+ GlobPattern::new(&format!(
+ "{}/inner/**/*.ts",
+ temp_dir.path().to_string_lossy().replace('\\', "/")
+ ))
+ .unwrap(),
+ ),
+ PathOrPattern::Pattern(
+ GlobPattern::new(&format!(
+ "{}/inner/sub/deeper/**/*.js",
+ temp_dir.path().to_string_lossy().replace('\\', "/")
+ ))
+ .unwrap(),
+ ),
+ PathOrPattern::Pattern(
+ GlobPattern::new(&format!(
+ "{}/other/**/*.js",
+ temp_dir.path().to_string_lossy().replace('\\', "/")
+ ))
+ .unwrap(),
+ ),
+ PathOrPattern::Path(temp_dir.path().join("sub/file.ts").to_path_buf()),
+ ])),
+ exclude: PathOrPatternSet::new(vec![
+ PathOrPattern::Pattern(
+ GlobPattern::new(&format!(
+ "{}/inner/other/**/*.ts",
+ temp_dir.path().to_string_lossy().replace('\\', "/")
+ ))
+ .unwrap(),
+ ),
+ PathOrPattern::Path(
+ temp_dir
+ .path()
+ .join("inner/sub/deeper/file.js")
+ .to_path_buf(),
+ ),
+ ]),
+ };
+ let split = ComparableFilePatterns::from_split(
+ temp_dir.path().as_path(),
+ &patterns.split_by_base(),
+ );
+ assert_eq!(
+ split,
+ vec![
+ (
+ "inner/sub/deeper".to_string(),
+ ComparableFilePatterns {
+ include: Some(vec![
+ "inner/sub/deeper/**/*.js".to_string(),
+ "inner/**/*.ts".to_string(),
+ ]),
+ exclude: vec!["inner/sub/deeper/file.js".to_string()],
+ }
+ ),
+ (
+ "sub/file.ts".to_string(),
+ ComparableFilePatterns {
+ include: Some(vec!["sub/file.ts".to_string()]),
+ exclude: vec![],
+ }
+ ),
+ (
+ "inner".to_string(),
+ ComparableFilePatterns {
+ include: Some(vec!["inner/**/*.ts".to_string()]),
+ exclude: vec![
+ "inner/other/**/*.ts".to_string(),
+ "inner/sub/deeper/file.js".to_string(),
+ ],
+ }
+ ),
+ (
+ "other".to_string(),
+ ComparableFilePatterns {
+ include: Some(vec!["other/**/*.js".to_string()]),
+ exclude: vec![],
+ }
+ )
+ ]
+ );
}
}