summaryrefslogtreecommitdiff
path: root/ext/node/analyze.rs
diff options
context:
space:
mode:
authorDavid Sherret <dsherret@users.noreply.github.com>2023-04-21 16:38:10 -0400
committerGitHub <noreply@github.com>2023-04-21 16:38:10 -0400
commit4a33c349afd4b2728eb8c3c29676651353282d3b (patch)
treea21ff79a51f9842347bab48051f5987f53bd018a /ext/node/analyze.rs
parent065d8771adfae6aa75cdd367741468c823fbae4a (diff)
refactor: move some CJS and ESM code analysis to ext/node (#18789)
Diffstat (limited to 'ext/node/analyze.rs')
-rw-r--r--ext/node/analyze.rs564
1 files changed, 564 insertions, 0 deletions
diff --git a/ext/node/analyze.rs b/ext/node/analyze.rs
new file mode 100644
index 000000000..03bf41995
--- /dev/null
+++ b/ext/node/analyze.rs
@@ -0,0 +1,564 @@
+// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
+
+use std::collections::HashSet;
+use std::collections::VecDeque;
+use std::fmt::Write;
+use std::path::Path;
+use std::path::PathBuf;
+
+use deno_core::anyhow::Context;
+use deno_core::ModuleSpecifier;
+use once_cell::sync::Lazy;
+
+use deno_core::error::AnyError;
+
+use crate::package_exports_resolve;
+use crate::NodeFs;
+use crate::NodeModuleKind;
+use crate::NodePermissions;
+use crate::NodeResolutionMode;
+use crate::PackageJson;
+use crate::PathClean;
+use crate::RequireNpmResolver;
+use crate::NODE_GLOBAL_THIS_NAME;
+
+static NODE_GLOBALS: &[&str] = &[
+ "Buffer",
+ "clearImmediate",
+ "clearInterval",
+ "clearTimeout",
+ "console",
+ "global",
+ "process",
+ "setImmediate",
+ "setInterval",
+ "setTimeout",
+];
+
+#[derive(Debug, Clone)]
+pub struct CjsAnalysis {
+ pub exports: Vec<String>,
+ pub reexports: Vec<String>,
+}
+
+/// Code analyzer for CJS and ESM files.
+pub trait CjsEsmCodeAnalyzer {
+ /// Analyzes CommonJs code for exports and reexports, which is
+ /// then used to determine the wrapper ESM module exports.
+ fn analyze_cjs(
+ &self,
+ specifier: &ModuleSpecifier,
+ source: &str,
+ ) -> Result<CjsAnalysis, AnyError>;
+
+ /// Analyzes ESM code for top level declarations. This is used
+ /// to help inform injecting node specific globals into Node ESM
+ /// code. For example, if a top level `setTimeout` function exists
+ /// then we don't want to inject a `setTimeout` declaration.
+ ///
+ /// Note: This will go away in the future once we do this all in v8.
+ fn analyze_esm_top_level_decls(
+ &self,
+ specifier: &ModuleSpecifier,
+ source: &str,
+ ) -> Result<HashSet<String>, AnyError>;
+}
+
+pub struct NodeCodeTranslator<
+ TCjsEsmCodeAnalyzer: CjsEsmCodeAnalyzer,
+ TRequireNpmResolver: RequireNpmResolver,
+> {
+ cjs_esm_code_analyzer: TCjsEsmCodeAnalyzer,
+ npm_resolver: TRequireNpmResolver,
+}
+
+impl<
+ TCjsEsmCodeAnalyzer: CjsEsmCodeAnalyzer,
+ TRequireNpmResolver: RequireNpmResolver,
+ > NodeCodeTranslator<TCjsEsmCodeAnalyzer, TRequireNpmResolver>
+{
+ pub fn new(
+ cjs_esm_code_analyzer: TCjsEsmCodeAnalyzer,
+ npm_resolver: TRequireNpmResolver,
+ ) -> Self {
+ Self {
+ cjs_esm_code_analyzer,
+ npm_resolver,
+ }
+ }
+
+ /// Resolves the code to be used when executing Node specific ESM code.
+ ///
+ /// Note: This will go away in the future once we do this all in v8.
+ pub fn esm_code_with_node_globals(
+ &self,
+ specifier: &ModuleSpecifier,
+ source: &str,
+ ) -> Result<String, AnyError> {
+ let top_level_decls = self
+ .cjs_esm_code_analyzer
+ .analyze_esm_top_level_decls(specifier, source)?;
+ Ok(esm_code_from_top_level_decls(source, &top_level_decls))
+ }
+
+ /// Translates given CJS module into ESM. This function will perform static
+ /// analysis on the file to find defined exports and reexports.
+ ///
+ /// For all discovered reexports the analysis will be performed recursively.
+ ///
+ /// If successful a source code for equivalent ES module is returned.
+ pub fn translate_cjs_to_esm<Fs: NodeFs>(
+ &self,
+ specifier: &ModuleSpecifier,
+ source: &str,
+ permissions: &mut dyn NodePermissions,
+ ) -> Result<String, AnyError> {
+ let mut temp_var_count = 0;
+ let mut handled_reexports: HashSet<String> = HashSet::default();
+
+ let analysis = self.cjs_esm_code_analyzer.analyze_cjs(specifier, source)?;
+
+ let mut source = vec![
+ r#"import {createRequire as __internalCreateRequire} from "node:module";
+ const require = __internalCreateRequire(import.meta.url);"#
+ .to_string(),
+ ];
+
+ let mut all_exports = analysis
+ .exports
+ .iter()
+ .map(|s| s.to_string())
+ .collect::<HashSet<_>>();
+
+ // (request, referrer)
+ let mut reexports_to_handle = VecDeque::new();
+ for reexport in analysis.reexports {
+ reexports_to_handle.push_back((reexport, specifier.clone()));
+ }
+
+ while let Some((reexport, referrer)) = reexports_to_handle.pop_front() {
+ if handled_reexports.contains(&reexport) {
+ continue;
+ }
+
+ handled_reexports.insert(reexport.to_string());
+
+ // First, resolve relate reexport specifier
+ let resolved_reexport = self.resolve::<Fs>(
+ &reexport,
+ &referrer,
+ // FIXME(bartlomieju): check if these conditions are okay, probably
+ // should be `deno-require`, because `deno` is already used in `esm_resolver.rs`
+ &["deno", "require", "default"],
+ NodeResolutionMode::Execution,
+ permissions,
+ )?;
+ // Second, read the source code from disk
+ let reexport_specifier =
+ ModuleSpecifier::from_file_path(&resolved_reexport).unwrap();
+ let reexport_file_text = Fs::read_to_string(&resolved_reexport)
+ .with_context(|| {
+ format!(
+ "Could not find '{}' ({}) referenced from {}",
+ reexport, reexport_specifier, referrer
+ )
+ })?;
+ {
+ let analysis = self
+ .cjs_esm_code_analyzer
+ .analyze_cjs(&reexport_specifier, &reexport_file_text)?;
+
+ for reexport in analysis.reexports {
+ reexports_to_handle.push_back((reexport, reexport_specifier.clone()));
+ }
+
+ all_exports.extend(
+ analysis
+ .exports
+ .into_iter()
+ .filter(|e| e.as_str() != "default"),
+ );
+ }
+ }
+
+ source.push(format!(
+ "const mod = require(\"{}\");",
+ specifier
+ .to_file_path()
+ .unwrap()
+ .to_str()
+ .unwrap()
+ .replace('\\', "\\\\")
+ .replace('\'', "\\\'")
+ .replace('\"', "\\\"")
+ ));
+
+ for export in &all_exports {
+ if export.as_str() != "default" {
+ add_export(
+ &mut source,
+ export,
+ &format!("mod[\"{export}\"]"),
+ &mut temp_var_count,
+ );
+ }
+ }
+
+ source.push("export default mod;".to_string());
+
+ let translated_source = source.join("\n");
+ Ok(translated_source)
+ }
+
+ fn resolve<Fs: NodeFs>(
+ &self,
+ specifier: &str,
+ referrer: &ModuleSpecifier,
+ conditions: &[&str],
+ mode: NodeResolutionMode,
+ permissions: &mut dyn NodePermissions,
+ ) -> Result<PathBuf, AnyError> {
+ if specifier.starts_with('/') {
+ todo!();
+ }
+
+ let referrer_path = referrer.to_file_path().unwrap();
+ if specifier.starts_with("./") || specifier.starts_with("../") {
+ if let Some(parent) = referrer_path.parent() {
+ return file_extension_probe::<Fs>(
+ parent.join(specifier),
+ &referrer_path,
+ );
+ } else {
+ todo!();
+ }
+ }
+
+ // We've got a bare specifier or maybe bare_specifier/blah.js"
+
+ let (package_specifier, package_subpath) =
+ parse_specifier(specifier).unwrap();
+
+ // todo(dsherret): use not_found error on not found here
+ let module_dir = self.npm_resolver.resolve_package_folder_from_package(
+ package_specifier.as_str(),
+ &referrer_path,
+ mode,
+ )?;
+
+ let package_json_path = module_dir.join("package.json");
+ if Fs::exists(&package_json_path) {
+ let package_json = PackageJson::load::<Fs>(
+ &self.npm_resolver,
+ permissions,
+ package_json_path.clone(),
+ )?;
+
+ if let Some(exports) = &package_json.exports {
+ return package_exports_resolve::<Fs>(
+ &package_json_path,
+ package_subpath,
+ exports,
+ referrer,
+ NodeModuleKind::Esm,
+ conditions,
+ mode,
+ &self.npm_resolver,
+ permissions,
+ );
+ }
+
+ // old school
+ if package_subpath != "." {
+ let d = module_dir.join(package_subpath);
+ if Fs::is_dir(&d) {
+ // subdir might have a package.json that specifies the entrypoint
+ let package_json_path = d.join("package.json");
+ if Fs::exists(&package_json_path) {
+ let package_json = PackageJson::load::<Fs>(
+ &self.npm_resolver,
+ permissions,
+ package_json_path,
+ )?;
+ if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
+ return Ok(d.join(main).clean());
+ }
+ }
+
+ return Ok(d.join("index.js").clean());
+ }
+ return file_extension_probe::<Fs>(d, &referrer_path);
+ } else if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
+ return Ok(module_dir.join(main).clean());
+ } else {
+ return Ok(module_dir.join("index.js").clean());
+ }
+ }
+ Err(not_found(specifier, &referrer_path))
+ }
+}
+
+fn esm_code_from_top_level_decls(
+ file_text: &str,
+ top_level_decls: &HashSet<String>,
+) -> String {
+ let mut globals = Vec::with_capacity(NODE_GLOBALS.len());
+ let has_global_this = top_level_decls.contains("globalThis");
+ for global in NODE_GLOBALS.iter() {
+ if !top_level_decls.contains(&global.to_string()) {
+ globals.push(*global);
+ }
+ }
+
+ let mut result = String::new();
+ let global_this_expr = NODE_GLOBAL_THIS_NAME.as_str();
+ let global_this_expr = if has_global_this {
+ global_this_expr
+ } else {
+ write!(result, "var globalThis = {global_this_expr};").unwrap();
+ "globalThis"
+ };
+ for global in globals {
+ write!(result, "var {global} = {global_this_expr}.{global};").unwrap();
+ }
+
+ // strip the shebang
+ let file_text = if file_text.starts_with("#!/") {
+ let start_index = file_text.find('\n').unwrap_or(file_text.len());
+ &file_text[start_index..]
+ } else {
+ file_text
+ };
+ result.push_str(file_text);
+
+ result
+}
+
+static RESERVED_WORDS: Lazy<HashSet<&str>> = Lazy::new(|| {
+ HashSet::from([
+ "break",
+ "case",
+ "catch",
+ "class",
+ "const",
+ "continue",
+ "debugger",
+ "default",
+ "delete",
+ "do",
+ "else",
+ "export",
+ "extends",
+ "false",
+ "finally",
+ "for",
+ "function",
+ "if",
+ "import",
+ "in",
+ "instanceof",
+ "new",
+ "null",
+ "return",
+ "super",
+ "switch",
+ "this",
+ "throw",
+ "true",
+ "try",
+ "typeof",
+ "var",
+ "void",
+ "while",
+ "with",
+ "yield",
+ "let",
+ "enum",
+ "implements",
+ "interface",
+ "package",
+ "private",
+ "protected",
+ "public",
+ "static",
+ ])
+});
+
+fn add_export(
+ source: &mut Vec<String>,
+ name: &str,
+ initializer: &str,
+ temp_var_count: &mut usize,
+) {
+ fn is_valid_var_decl(name: &str) -> bool {
+ // it's ok to be super strict here
+ name
+ .chars()
+ .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$')
+ }
+
+ // TODO(bartlomieju): Node actually checks if a given export exists in `exports` object,
+ // but it might not be necessary here since our analysis is more detailed?
+ if RESERVED_WORDS.contains(name) || !is_valid_var_decl(name) {
+ *temp_var_count += 1;
+ // we can't create an identifier with a reserved word or invalid identifier name,
+ // so assign it to a temporary variable that won't have a conflict, then re-export
+ // it as a string
+ source.push(format!(
+ "const __deno_export_{temp_var_count}__ = {initializer};"
+ ));
+ source.push(format!(
+ "export {{ __deno_export_{temp_var_count}__ as \"{name}\" }};"
+ ));
+ } else {
+ source.push(format!("export const {name} = {initializer};"));
+ }
+}
+
+fn parse_specifier(specifier: &str) -> Option<(String, String)> {
+ let mut separator_index = specifier.find('/');
+ let mut valid_package_name = true;
+ // let mut is_scoped = false;
+ if specifier.is_empty() {
+ valid_package_name = false;
+ } else if specifier.starts_with('@') {
+ // is_scoped = true;
+ if let Some(index) = separator_index {
+ separator_index = specifier[index + 1..].find('/').map(|i| i + index + 1);
+ } else {
+ valid_package_name = false;
+ }
+ }
+
+ let package_name = if let Some(index) = separator_index {
+ specifier[0..index].to_string()
+ } else {
+ specifier.to_string()
+ };
+
+ // Package name cannot have leading . and cannot have percent-encoding or separators.
+ for ch in package_name.chars() {
+ if ch == '%' || ch == '\\' {
+ valid_package_name = false;
+ break;
+ }
+ }
+
+ if !valid_package_name {
+ return None;
+ }
+
+ let package_subpath = if let Some(index) = separator_index {
+ format!(".{}", specifier.chars().skip(index).collect::<String>())
+ } else {
+ ".".to_string()
+ };
+
+ Some((package_name, package_subpath))
+}
+
+fn file_extension_probe<Fs: NodeFs>(
+ p: PathBuf,
+ referrer: &Path,
+) -> Result<PathBuf, AnyError> {
+ let p = p.clean();
+ if Fs::exists(&p) {
+ let file_name = p.file_name().unwrap();
+ let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
+ if Fs::is_file(&p_js) {
+ return Ok(p_js);
+ } else if Fs::is_dir(&p) {
+ return Ok(p.join("index.js"));
+ } else {
+ return Ok(p);
+ }
+ } else if let Some(file_name) = p.file_name() {
+ let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
+ if Fs::is_file(&p_js) {
+ return Ok(p_js);
+ }
+ }
+ Err(not_found(&p.to_string_lossy(), referrer))
+}
+
+fn not_found(path: &str, referrer: &Path) -> AnyError {
+ let msg = format!(
+ "[ERR_MODULE_NOT_FOUND] Cannot find module \"{}\" imported from \"{}\"",
+ path,
+ referrer.to_string_lossy()
+ );
+ std::io::Error::new(std::io::ErrorKind::NotFound, msg).into()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_esm_code_with_node_globals() {
+ let r = esm_code_from_top_level_decls(
+ "export const x = 1;",
+ &HashSet::from(["x".to_string()]),
+ );
+ assert!(r.contains(&format!(
+ "var globalThis = {};",
+ NODE_GLOBAL_THIS_NAME.as_str()
+ )));
+ assert!(r.contains("var process = globalThis.process;"));
+ assert!(r.contains("export const x = 1;"));
+ }
+
+ #[test]
+ fn test_esm_code_with_node_globals_with_shebang() {
+ let r = esm_code_from_top_level_decls(
+ "#!/usr/bin/env node\nexport const x = 1;",
+ &HashSet::from(["x".to_string()]),
+ );
+ assert_eq!(
+ r,
+ format!(
+ concat!(
+ "var globalThis = {}",
+ ";var Buffer = globalThis.Buffer;",
+ "var clearImmediate = globalThis.clearImmediate;var clearInterval = globalThis.clearInterval;",
+ "var clearTimeout = globalThis.clearTimeout;var console = globalThis.console;",
+ "var global = globalThis.global;var process = globalThis.process;",
+ "var setImmediate = globalThis.setImmediate;var setInterval = globalThis.setInterval;",
+ "var setTimeout = globalThis.setTimeout;\n",
+ "export const x = 1;"
+ ),
+ NODE_GLOBAL_THIS_NAME.as_str(),
+ )
+ );
+ }
+
+ #[test]
+ fn test_add_export() {
+ let mut temp_var_count = 0;
+ let mut source = vec![];
+
+ let exports = vec!["static", "server", "app", "dashed-export"];
+ for export in exports {
+ add_export(&mut source, export, "init", &mut temp_var_count);
+ }
+ assert_eq!(
+ source,
+ vec![
+ "const __deno_export_1__ = init;".to_string(),
+ "export { __deno_export_1__ as \"static\" };".to_string(),
+ "export const server = init;".to_string(),
+ "export const app = init;".to_string(),
+ "const __deno_export_2__ = init;".to_string(),
+ "export { __deno_export_2__ as \"dashed-export\" };".to_string(),
+ ]
+ )
+ }
+
+ #[test]
+ fn test_parse_specifier() {
+ assert_eq!(
+ parse_specifier("@some-package/core/actions"),
+ Some(("@some-package/core".to_string(), "./actions".to_string()))
+ );
+ }
+}