summaryrefslogtreecommitdiff
path: root/std/path/glob.ts
diff options
context:
space:
mode:
Diffstat (limited to 'std/path/glob.ts')
-rw-r--r--std/path/glob.ts387
1 files changed, 0 insertions, 387 deletions
diff --git a/std/path/glob.ts b/std/path/glob.ts
deleted file mode 100644
index d599c1b88..000000000
--- a/std/path/glob.ts
+++ /dev/null
@@ -1,387 +0,0 @@
-// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
-// This module is browser compatible.
-
-import { osType } from "../_util/os.ts";
-import { join, normalize } from "./mod.ts";
-import { SEP, SEP_PATTERN } from "./separator.ts";
-
-export interface GlobOptions {
- /** Extended glob syntax.
- * See https://www.linuxjournal.com/content/bash-extended-globbing. Defaults
- * to true. */
- extended?: boolean;
- /** Globstar syntax.
- * See https://www.linuxjournal.com/content/globstar-new-bash-globbing-option.
- * If false, `**` is treated like `*`. Defaults to true. */
- globstar?: boolean;
- /** Operating system. Defaults to the native OS. */
- os?: typeof Deno.build.os;
-}
-
-export type GlobToRegExpOptions = GlobOptions;
-
-// deno-fmt-ignore
-const regExpEscapeChars = ["!", "$", "(", ")", "*", "+", ".", "=", "?", "[", "\\", "^", "{", "|"];
-const rangeEscapeChars = ["-", "\\", "]"];
-
-/** Convert a glob string to a regular expression.
- *
- * Tries to match bash glob expansion as closely as possible.
- *
- * Basic glob syntax:
- * - `*` - Matches everything without leaving the path segment.
- * - `{foo,bar}` - Matches `foo` or `bar`.
- * - `[abcd]` - Matches `a`, `b`, `c` or `d`.
- * - `[a-d]` - Matches `a`, `b`, `c` or `d`.
- * - `[!abcd]` - Matches any single character besides `a`, `b`, `c` or `d`.
- * - `[[:<class>:]]` - Matches any character belonging to `<class>`.
- * - `[[:alnum:]]` - Matches any digit or letter.
- * - `[[:digit:]abc]` - Matches any digit, `a`, `b` or `c`.
- * - See https://facelessuser.github.io/wcmatch/glob/#posix-character-classes
- * for a complete list of supported character classes.
- * - `\` - Escapes the next character for an `os` other than `"windows"`.
- * - \` - Escapes the next character for `os` set to `"windows"`.
- * - `/` - Path separator.
- * - `\` - Additional path separator only for `os` set to `"windows"`.
- *
- * Extended syntax:
- * - Requires `{ extended: true }`.
- * - `?(foo|bar)` - Matches 0 or 1 instance of `{foo,bar}`.
- * - `@(foo|bar)` - Matches 1 instance of `{foo,bar}`. They behave the same.
- * - `*(foo|bar)` - Matches _n_ instances of `{foo,bar}`.
- * - `+(foo|bar)` - Matches _n > 0_ instances of `{foo,bar}`.
- * - `!(foo|bar)` - Matches anything other than `{foo,bar}`.
- * - See https://www.linuxjournal.com/content/bash-extended-globbing.
- *
- * Globstar syntax:
- * - Requires `{ globstar: true }`.
- * - `**` - Matches any number of any path segments.
- * - Must comprise its entire path segment in the provided glob.
- * - See https://www.linuxjournal.com/content/globstar-new-bash-globbing-option.
- *
- * Note the following properties:
- * - The generated `RegExp` is anchored at both start and end.
- * - Repeating and trailing separators are tolerated. Trailing separators in the
- * provided glob have no meaning and are discarded.
- * - Absolute globs will only match absolute paths, etc.
- * - Empty globs will match nothing.
- * - Any special glob syntax must be contained to one path segment. For example,
- * `?(foo|bar/baz)` is invalid. The separator will take precendence and the
- * first segment ends with an unclosed group.
- * - If a path segment ends with unclosed groups or a dangling escape prefix, a
- * parse error has occured. Every character for that segment is taken
- * literally in this event.
- *
- * Limitations:
- * - A negative group like `!(foo|bar)` will wrongly be converted to a negative
- * look-ahead followed by a wildcard. This means that `!(foo).js` will wrongly
- * fail to match `foobar.js`, even though `foobar` is not `foo`. Effectively,
- * `!(foo|bar)` is treated like `!(@(foo|bar)*)`. This will work correctly if
- * the group occurs not nested at the end of the segment. */
-export function globToRegExp(
- glob: string,
- { extended = true, globstar: globstarOption = true, os = osType }:
- GlobToRegExpOptions = {},
-): RegExp {
- if (glob == "") {
- return /(?!)/;
- }
-
- const sep = os == "windows" ? "(?:\\\\|/)+" : "/+";
- const sepMaybe = os == "windows" ? "(?:\\\\|/)*" : "/*";
- const seps = os == "windows" ? ["\\", "/"] : ["/"];
- const globstar = os == "windows"
- ? "(?:[^\\\\/]*(?:\\\\|/|$)+)*"
- : "(?:[^/]*(?:/|$)+)*";
- const wildcard = os == "windows" ? "[^\\\\/]*" : "[^/]*";
- const escapePrefix = os == "windows" ? "`" : "\\";
-
- // Remove trailing separators.
- let newLength = glob.length;
- for (; newLength > 1 && seps.includes(glob[newLength - 1]); newLength--);
- glob = glob.slice(0, newLength);
-
- let regExpString = "";
-
- // Terminates correctly. Trust that `j` is incremented every iteration.
- for (let j = 0; j < glob.length;) {
- let segment = "";
- const groupStack = [];
- let inRange = false;
- let inEscape = false;
- let endsWithSep = false;
- let i = j;
-
- // Terminates with `i` at the non-inclusive end of the current segment.
- for (; i < glob.length && !seps.includes(glob[i]); i++) {
- if (inEscape) {
- inEscape = false;
- const escapeChars = inRange ? rangeEscapeChars : regExpEscapeChars;
- segment += escapeChars.includes(glob[i]) ? `\\${glob[i]}` : glob[i];
- continue;
- }
-
- if (glob[i] == escapePrefix) {
- inEscape = true;
- continue;
- }
-
- if (glob[i] == "[") {
- if (!inRange) {
- inRange = true;
- segment += "[";
- if (glob[i + 1] == "!") {
- i++;
- segment += "^";
- } else if (glob[i + 1] == "^") {
- i++;
- segment += "\\^";
- }
- continue;
- } else if (glob[i + 1] == ":") {
- let k = i + 1;
- let value = "";
- while (glob[k + 1] != null && glob[k + 1] != ":") {
- value += glob[k + 1];
- k++;
- }
- if (glob[k + 1] == ":" && glob[k + 2] == "]") {
- i = k + 2;
- if (value == "alnum") segment += "\\dA-Za-z";
- else if (value == "alpha") segment += "A-Za-z";
- else if (value == "ascii") segment += "\x00-\x7F";
- else if (value == "blank") segment += "\t ";
- else if (value == "cntrl") segment += "\x00-\x1F\x7F";
- else if (value == "digit") segment += "\\d";
- else if (value == "graph") segment += "\x21-\x7E";
- else if (value == "lower") segment += "a-z";
- else if (value == "print") segment += "\x20-\x7E";
- else if (value == "punct") {
- segment += "!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_‘{|}~";
- } else if (value == "space") segment += "\\s\v";
- else if (value == "upper") segment += "A-Z";
- else if (value == "word") segment += "\\w";
- else if (value == "xdigit") segment += "\\dA-Fa-f";
- continue;
- }
- }
- }
-
- if (glob[i] == "]" && inRange) {
- inRange = false;
- segment += "]";
- continue;
- }
-
- if (inRange) {
- if (glob[i] == "\\") {
- segment += `\\\\`;
- } else {
- segment += glob[i];
- }
- continue;
- }
-
- if (
- glob[i] == ")" && groupStack.length > 0 &&
- groupStack[groupStack.length - 1] != "BRACE"
- ) {
- segment += ")";
- const type = groupStack.pop()!;
- if (type == "!") {
- segment += wildcard;
- } else if (type != "@") {
- segment += type;
- }
- continue;
- }
-
- if (
- glob[i] == "|" && groupStack.length > 0 &&
- groupStack[groupStack.length - 1] != "BRACE"
- ) {
- segment += "|";
- continue;
- }
-
- if (glob[i] == "+" && extended && glob[i + 1] == "(") {
- i++;
- groupStack.push("+");
- segment += "(?:";
- continue;
- }
-
- if (glob[i] == "@" && extended && glob[i + 1] == "(") {
- i++;
- groupStack.push("@");
- segment += "(?:";
- continue;
- }
-
- if (glob[i] == "?") {
- if (extended && glob[i + 1] == "(") {
- i++;
- groupStack.push("?");
- segment += "(?:";
- } else {
- segment += ".";
- }
- continue;
- }
-
- if (glob[i] == "!" && extended && glob[i + 1] == "(") {
- i++;
- groupStack.push("!");
- segment += "(?!";
- continue;
- }
-
- if (glob[i] == "{") {
- groupStack.push("BRACE");
- segment += "(?:";
- continue;
- }
-
- if (glob[i] == "}" && groupStack[groupStack.length - 1] == "BRACE") {
- groupStack.pop();
- segment += ")";
- continue;
- }
-
- if (glob[i] == "," && groupStack[groupStack.length - 1] == "BRACE") {
- segment += "|";
- continue;
- }
-
- if (glob[i] == "*") {
- if (extended && glob[i + 1] == "(") {
- i++;
- groupStack.push("*");
- segment += "(?:";
- } else {
- const prevChar = glob[i - 1];
- let numStars = 1;
- while (glob[i + 1] == "*") {
- i++;
- numStars++;
- }
- const nextChar = glob[i + 1];
- if (
- globstarOption && numStars == 2 &&
- [...seps, undefined].includes(prevChar) &&
- [...seps, undefined].includes(nextChar)
- ) {
- segment += globstar;
- endsWithSep = true;
- } else {
- segment += wildcard;
- }
- }
- continue;
- }
-
- segment += regExpEscapeChars.includes(glob[i]) ? `\\${glob[i]}` : glob[i];
- }
-
- // Check for unclosed groups or a dangling backslash.
- if (groupStack.length > 0 || inRange || inEscape) {
- // Parse failure. Take all characters from this segment literally.
- segment = "";
- for (const c of glob.slice(j, i)) {
- segment += regExpEscapeChars.includes(c) ? `\\${c}` : c;
- endsWithSep = false;
- }
- }
-
- regExpString += segment;
- if (!endsWithSep) {
- regExpString += i < glob.length ? sep : sepMaybe;
- endsWithSep = true;
- }
-
- // Terminates with `i` at the start of the next segment.
- while (seps.includes(glob[i])) i++;
-
- // Check that the next value of `j` is indeed higher than the current value.
- if (!(i > j)) {
- throw new Error("Assertion failure: i > j (potential infinite loop)");
- }
- j = i;
- }
-
- regExpString = `^${regExpString}$`;
- return new RegExp(regExpString);
-}
-
-/** Test whether the given string is a glob */
-export function isGlob(str: string): boolean {
- const chars: Record<string, string> = { "{": "}", "(": ")", "[": "]" };
- const regex =
- /\\(.)|(^!|\*|[\].+)]\?|\[[^\\\]]+\]|\{[^\\}]+\}|\(\?[:!=][^\\)]+\)|\([^|]+\|[^\\)]+\))/;
-
- if (str === "") {
- return false;
- }
-
- let match: RegExpExecArray | null;
-
- while ((match = regex.exec(str))) {
- if (match[2]) return true;
- let idx = match.index + match[0].length;
-
- // if an open bracket/brace/paren is escaped,
- // set the index to the next closing character
- const open = match[1];
- const close = open ? chars[open] : null;
- if (open && close) {
- const n = str.indexOf(close, idx);
- if (n !== -1) {
- idx = n + 1;
- }
- }
-
- str = str.slice(idx);
- }
-
- return false;
-}
-
-/** Like normalize(), but doesn't collapse "**\/.." when `globstar` is true. */
-export function normalizeGlob(
- glob: string,
- { globstar = false }: GlobOptions = {},
-): string {
- if (glob.match(/\0/g)) {
- throw new Error(`Glob contains invalid characters: "${glob}"`);
- }
- if (!globstar) {
- return normalize(glob);
- }
- const s = SEP_PATTERN.source;
- const badParentPattern = new RegExp(
- `(?<=(${s}|^)\\*\\*${s})\\.\\.(?=${s}|$)`,
- "g",
- );
- return normalize(glob.replace(badParentPattern, "\0")).replace(/\0/g, "..");
-}
-
-/** Like join(), but doesn't collapse "**\/.." when `globstar` is true. */
-export function joinGlobs(
- globs: string[],
- { extended = false, globstar = false }: GlobOptions = {},
-): string {
- if (!globstar || globs.length == 0) {
- return join(...globs);
- }
- if (globs.length === 0) return ".";
- let joined: string | undefined;
- for (const glob of globs) {
- const path = glob;
- if (path.length > 0) {
- if (!joined) joined = path;
- else joined += `${SEP}${path}`;
- }
- }
- if (!joined) return ".";
- return normalizeGlob(joined, { extended, globstar });
-}