diff options
Diffstat (limited to 'ext/node/polyfills/path/glob.ts')
-rw-r--r-- | ext/node/polyfills/path/glob.ts | 420 |
1 files changed, 420 insertions, 0 deletions
diff --git a/ext/node/polyfills/path/glob.ts b/ext/node/polyfills/path/glob.ts new file mode 100644 index 000000000..c0da29b9f --- /dev/null +++ b/ext/node/polyfills/path/glob.ts @@ -0,0 +1,420 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +import { isWindows, osType } from "internal:deno_node/polyfills/_util/os.ts"; +import { + SEP, + SEP_PATTERN, +} from "internal:deno_node/polyfills/path/separator.ts"; +import * as _win32 from "internal:deno_node/polyfills/path/win32.ts"; +import * as _posix from "internal:deno_node/polyfills/path/posix.ts"; +import type { OSType } from "internal:deno_node/polyfills/_util/os.ts"; + +const path = isWindows ? _win32 : _posix; +const { join, normalize } = path; + +export interface GlobOptions { + /** Extended glob syntax. + * See https://www.linuxjournal.com/content/bash-extended-globbing. + * + * @default {true} + */ + extended?: boolean; + /** Globstar syntax. + * See https://www.linuxjournal.com/content/globstar-new-bash-globbing-option. + * If false, `**` is treated like `*`. + * + * @default {true} + */ + globstar?: boolean; + /** Whether globstar should be case-insensitive. */ + caseInsensitive?: boolean; + /** Operating system. Defaults to the native OS. */ + os?: OSType; +} + +export type GlobToRegExpOptions = GlobOptions; + +const regExpEscapeChars = [ + "!", + "$", + "(", + ")", + "*", + "+", + ".", + "=", + "?", + "[", + "\\", + "^", + "{", + "|", +]; +const rangeEscapeChars = ["-", "\\", "]"]; + +/** Convert a glob string to a regular expression. + * + * Tries to match bash glob expansion as closely as possible. + * + * Basic glob syntax: + * - `*` - Matches everything without leaving the path segment. + * - `?` - Matches any single character. + * - `{foo,bar}` - Matches `foo` or `bar`. + * - `[abcd]` - Matches `a`, `b`, `c` or `d`. + * - `[a-d]` - Matches `a`, `b`, `c` or `d`. + * - `[!abcd]` - Matches any single character besides `a`, `b`, `c` or `d`. + * - `[[:<class>:]]` - Matches any character belonging to `<class>`. + * - `[[:alnum:]]` - Matches any digit or letter. + * - `[[:digit:]abc]` - Matches any digit, `a`, `b` or `c`. + * - See https://facelessuser.github.io/wcmatch/glob/#posix-character-classes + * for a complete list of supported character classes. + * - `\` - Escapes the next character for an `os` other than `"windows"`. + * - \` - Escapes the next character for `os` set to `"windows"`. + * - `/` - Path separator. + * - `\` - Additional path separator only for `os` set to `"windows"`. + * + * Extended syntax: + * - Requires `{ extended: true }`. + * - `?(foo|bar)` - Matches 0 or 1 instance of `{foo,bar}`. + * - `@(foo|bar)` - Matches 1 instance of `{foo,bar}`. They behave the same. + * - `*(foo|bar)` - Matches _n_ instances of `{foo,bar}`. + * - `+(foo|bar)` - Matches _n > 0_ instances of `{foo,bar}`. + * - `!(foo|bar)` - Matches anything other than `{foo,bar}`. + * - See https://www.linuxjournal.com/content/bash-extended-globbing. + * + * Globstar syntax: + * - Requires `{ globstar: true }`. + * - `**` - Matches any number of any path segments. + * - Must comprise its entire path segment in the provided glob. + * - See https://www.linuxjournal.com/content/globstar-new-bash-globbing-option. + * + * Note the following properties: + * - The generated `RegExp` is anchored at both start and end. + * - Repeating and trailing separators are tolerated. Trailing separators in the + * provided glob have no meaning and are discarded. + * - Absolute globs will only match absolute paths, etc. + * - Empty globs will match nothing. + * - Any special glob syntax must be contained to one path segment. For example, + * `?(foo|bar/baz)` is invalid. The separator will take precedence and the + * first segment ends with an unclosed group. + * - If a path segment ends with unclosed groups or a dangling escape prefix, a + * parse error has occurred. Every character for that segment is taken + * literally in this event. + * + * Limitations: + * - A negative group like `!(foo|bar)` will wrongly be converted to a negative + * look-ahead followed by a wildcard. This means that `!(foo).js` will wrongly + * fail to match `foobar.js`, even though `foobar` is not `foo`. Effectively, + * `!(foo|bar)` is treated like `!(@(foo|bar)*)`. This will work correctly if + * the group occurs not nested at the end of the segment. */ +export function globToRegExp( + glob: string, + { + extended = true, + globstar: globstarOption = true, + os = osType, + caseInsensitive = false, + }: GlobToRegExpOptions = {}, +): RegExp { + if (glob == "") { + return /(?!)/; + } + + const sep = os == "windows" ? "(?:\\\\|/)+" : "/+"; + const sepMaybe = os == "windows" ? "(?:\\\\|/)*" : "/*"; + const seps = os == "windows" ? ["\\", "/"] : ["/"]; + const globstar = os == "windows" + ? "(?:[^\\\\/]*(?:\\\\|/|$)+)*" + : "(?:[^/]*(?:/|$)+)*"; + const wildcard = os == "windows" ? "[^\\\\/]*" : "[^/]*"; + const escapePrefix = os == "windows" ? "`" : "\\"; + + // Remove trailing separators. + let newLength = glob.length; + for (; newLength > 1 && seps.includes(glob[newLength - 1]); newLength--); + glob = glob.slice(0, newLength); + + let regExpString = ""; + + // Terminates correctly. Trust that `j` is incremented every iteration. + for (let j = 0; j < glob.length;) { + let segment = ""; + const groupStack: string[] = []; + let inRange = false; + let inEscape = false; + let endsWithSep = false; + let i = j; + + // Terminates with `i` at the non-inclusive end of the current segment. + for (; i < glob.length && !seps.includes(glob[i]); i++) { + if (inEscape) { + inEscape = false; + const escapeChars = inRange ? rangeEscapeChars : regExpEscapeChars; + segment += escapeChars.includes(glob[i]) ? `\\${glob[i]}` : glob[i]; + continue; + } + + if (glob[i] == escapePrefix) { + inEscape = true; + continue; + } + + if (glob[i] == "[") { + if (!inRange) { + inRange = true; + segment += "["; + if (glob[i + 1] == "!") { + i++; + segment += "^"; + } else if (glob[i + 1] == "^") { + i++; + segment += "\\^"; + } + continue; + } else if (glob[i + 1] == ":") { + let k = i + 1; + let value = ""; + while (glob[k + 1] != null && glob[k + 1] != ":") { + value += glob[k + 1]; + k++; + } + if (glob[k + 1] == ":" && glob[k + 2] == "]") { + i = k + 2; + if (value == "alnum") segment += "\\dA-Za-z"; + else if (value == "alpha") segment += "A-Za-z"; + else if (value == "ascii") segment += "\x00-\x7F"; + else if (value == "blank") segment += "\t "; + else if (value == "cntrl") segment += "\x00-\x1F\x7F"; + else if (value == "digit") segment += "\\d"; + else if (value == "graph") segment += "\x21-\x7E"; + else if (value == "lower") segment += "a-z"; + else if (value == "print") segment += "\x20-\x7E"; + else if (value == "punct") { + segment += "!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_‘{|}~"; + } else if (value == "space") segment += "\\s\v"; + else if (value == "upper") segment += "A-Z"; + else if (value == "word") segment += "\\w"; + else if (value == "xdigit") segment += "\\dA-Fa-f"; + continue; + } + } + } + + if (glob[i] == "]" && inRange) { + inRange = false; + segment += "]"; + continue; + } + + if (inRange) { + if (glob[i] == "\\") { + segment += `\\\\`; + } else { + segment += glob[i]; + } + continue; + } + + if ( + glob[i] == ")" && groupStack.length > 0 && + groupStack[groupStack.length - 1] != "BRACE" + ) { + segment += ")"; + const type = groupStack.pop()!; + if (type == "!") { + segment += wildcard; + } else if (type != "@") { + segment += type; + } + continue; + } + + if ( + glob[i] == "|" && groupStack.length > 0 && + groupStack[groupStack.length - 1] != "BRACE" + ) { + segment += "|"; + continue; + } + + if (glob[i] == "+" && extended && glob[i + 1] == "(") { + i++; + groupStack.push("+"); + segment += "(?:"; + continue; + } + + if (glob[i] == "@" && extended && glob[i + 1] == "(") { + i++; + groupStack.push("@"); + segment += "(?:"; + continue; + } + + if (glob[i] == "?") { + if (extended && glob[i + 1] == "(") { + i++; + groupStack.push("?"); + segment += "(?:"; + } else { + segment += "."; + } + continue; + } + + if (glob[i] == "!" && extended && glob[i + 1] == "(") { + i++; + groupStack.push("!"); + segment += "(?!"; + continue; + } + + if (glob[i] == "{") { + groupStack.push("BRACE"); + segment += "(?:"; + continue; + } + + if (glob[i] == "}" && groupStack[groupStack.length - 1] == "BRACE") { + groupStack.pop(); + segment += ")"; + continue; + } + + if (glob[i] == "," && groupStack[groupStack.length - 1] == "BRACE") { + segment += "|"; + continue; + } + + if (glob[i] == "*") { + if (extended && glob[i + 1] == "(") { + i++; + groupStack.push("*"); + segment += "(?:"; + } else { + const prevChar = glob[i - 1]; + let numStars = 1; + while (glob[i + 1] == "*") { + i++; + numStars++; + } + const nextChar = glob[i + 1]; + if ( + globstarOption && numStars == 2 && + [...seps, undefined].includes(prevChar) && + [...seps, undefined].includes(nextChar) + ) { + segment += globstar; + endsWithSep = true; + } else { + segment += wildcard; + } + } + continue; + } + + segment += regExpEscapeChars.includes(glob[i]) ? `\\${glob[i]}` : glob[i]; + } + + // Check for unclosed groups or a dangling backslash. + if (groupStack.length > 0 || inRange || inEscape) { + // Parse failure. Take all characters from this segment literally. + segment = ""; + for (const c of glob.slice(j, i)) { + segment += regExpEscapeChars.includes(c) ? `\\${c}` : c; + endsWithSep = false; + } + } + + regExpString += segment; + if (!endsWithSep) { + regExpString += i < glob.length ? sep : sepMaybe; + endsWithSep = true; + } + + // Terminates with `i` at the start of the next segment. + while (seps.includes(glob[i])) i++; + + // Check that the next value of `j` is indeed higher than the current value. + if (!(i > j)) { + throw new Error("Assertion failure: i > j (potential infinite loop)"); + } + j = i; + } + + regExpString = `^${regExpString}$`; + return new RegExp(regExpString, caseInsensitive ? "i" : ""); +} + +/** Test whether the given string is a glob */ +export function isGlob(str: string): boolean { + const chars: Record<string, string> = { "{": "}", "(": ")", "[": "]" }; + const regex = + /\\(.)|(^!|\*|\?|[\].+)]\?|\[[^\\\]]+\]|\{[^\\}]+\}|\(\?[:!=][^\\)]+\)|\([^|]+\|[^\\)]+\))/; + + if (str === "") { + return false; + } + + let match: RegExpExecArray | null; + + while ((match = regex.exec(str))) { + if (match[2]) return true; + let idx = match.index + match[0].length; + + // if an open bracket/brace/paren is escaped, + // set the index to the next closing character + const open = match[1]; + const close = open ? chars[open] : null; + if (open && close) { + const n = str.indexOf(close, idx); + if (n !== -1) { + idx = n + 1; + } + } + + str = str.slice(idx); + } + + return false; +} + +/** Like normalize(), but doesn't collapse "**\/.." when `globstar` is true. */ +export function normalizeGlob( + glob: string, + { globstar = false }: GlobOptions = {}, +): string { + if (glob.match(/\0/g)) { + throw new Error(`Glob contains invalid characters: "${glob}"`); + } + if (!globstar) { + return normalize(glob); + } + const s = SEP_PATTERN.source; + const badParentPattern = new RegExp( + `(?<=(${s}|^)\\*\\*${s})\\.\\.(?=${s}|$)`, + "g", + ); + return normalize(glob.replace(badParentPattern, "\0")).replace(/\0/g, ".."); +} + +/** Like join(), but doesn't collapse "**\/.." when `globstar` is true. */ +export function joinGlobs( + globs: string[], + { extended = true, globstar = false }: GlobOptions = {}, +): string { + if (!globstar || globs.length == 0) { + return join(...globs); + } + if (globs.length === 0) return "."; + let joined: string | undefined; + for (const glob of globs) { + const path = glob; + if (path.length > 0) { + if (!joined) joined = path; + else joined += `${SEP}${path}`; + } + } + if (!joined) return "."; + return normalizeGlob(joined, { extended, globstar }); +} |