diff options
-rw-r--r-- | cli/cache/node.rs | 8 | ||||
-rw-r--r-- | cli/module_loader.rs | 15 | ||||
-rw-r--r-- | cli/node/analyze.rs | 632 | ||||
-rw-r--r-- | cli/node/mod.rs | 6 | ||||
-rw-r--r-- | cli/npm/mod.rs | 2 | ||||
-rw-r--r-- | cli/npm/resolvers/mod.rs | 10 | ||||
-rw-r--r-- | cli/proc_state.rs | 13 | ||||
-rw-r--r-- | ext/node/analyze.rs | 564 | ||||
-rw-r--r-- | ext/node/crypto/x509.rs | 6 | ||||
-rw-r--r-- | ext/node/lib.rs | 1 |
10 files changed, 654 insertions, 603 deletions
diff --git a/cli/cache/node.rs b/cli/cache/node.rs index f42f132fd..298d81e2f 100644 --- a/cli/cache/node.rs +++ b/cli/cache/node.rs @@ -42,14 +42,6 @@ pub struct NodeAnalysisCache { } impl NodeAnalysisCache { - #[cfg(test)] - pub fn new_in_memory() -> Self { - Self::new(CacheDB::in_memory( - &NODE_ANALYSIS_CACHE_DB, - crate::version::deno(), - )) - } - pub fn new(db: CacheDB) -> Self { Self { inner: NodeAnalysisCacheInner::new(db), diff --git a/cli/module_loader.rs b/cli/module_loader.rs index 06755bbf5..07fad6ffc 100644 --- a/cli/module_loader.rs +++ b/cli/module_loader.rs @@ -11,9 +11,10 @@ use crate::graph_util::graph_valid_with_cli_options; use crate::graph_util::ModuleGraphBuilder; use crate::graph_util::ModuleGraphContainer; use crate::node; +use crate::node::CliCjsEsmCodeAnalyzer; use crate::node::CliNodeResolver; -use crate::node::NodeCodeTranslator; use crate::node::NodeResolution; +use crate::npm::CliRequireNpmResolver; use crate::proc_state::CjsResolutionStore; use crate::proc_state::FileWatcherReporter; use crate::proc_state::ProcState; @@ -49,7 +50,9 @@ use deno_graph::JsonModule; use deno_graph::Module; use deno_graph::Resolution; use deno_lockfile::Lockfile; +use deno_runtime::deno_node::analyze::NodeCodeTranslator; use deno_runtime::deno_node::NodeResolutionMode; +use deno_runtime::deno_node::RealFs; use deno_runtime::permissions::PermissionsContainer; use deno_semver::npm::NpmPackageReqReference; use std::borrow::Cow; @@ -241,7 +244,8 @@ pub struct CliModuleLoader { emitter: Arc<Emitter>, graph_container: Arc<ModuleGraphContainer>, module_load_preparer: Arc<ModuleLoadPreparer>, - node_code_translator: Arc<NodeCodeTranslator>, + node_code_translator: + Arc<NodeCodeTranslator<CliCjsEsmCodeAnalyzer, CliRequireNpmResolver>>, node_resolver: Arc<CliNodeResolver>, parsed_source_cache: Arc<ParsedSourceCache>, resolver: Arc<CliGraphResolver>, @@ -385,17 +389,16 @@ impl CliModuleLoader { self.root_permissions.clone() }; // translate cjs to esm if it's cjs and inject node globals - self.node_code_translator.translate_cjs_to_esm( + self.node_code_translator.translate_cjs_to_esm::<RealFs>( specifier, - code, - MediaType::Cjs, + &code, &mut permissions, )? } else { // only inject node globals for esm self .node_code_translator - .esm_code_with_node_globals(specifier, code)? + .esm_code_with_node_globals(specifier, &code)? }; ModuleCodeSource { code: code.into(), diff --git a/cli/node/analyze.rs b/cli/node/analyze.rs index f93e9fa91..27818639e 100644 --- a/cli/node/analyze.rs +++ b/cli/node/analyze.rs @@ -1,11 +1,6 @@ // Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. use std::collections::HashSet; -use std::collections::VecDeque; -use std::fmt::Write; -use std::path::Path; -use std::path::PathBuf; -use std::sync::Arc; use deno_ast::swc::common::SyntaxContext; use deno_ast::view::Node; @@ -15,195 +10,35 @@ use deno_ast::MediaType; use deno_ast::ModuleSpecifier; use deno_ast::ParsedSource; use deno_ast::SourceRanged; -use deno_core::anyhow::anyhow; use deno_core::error::AnyError; -use deno_runtime::deno_node::package_exports_resolve; -use deno_runtime::deno_node::NodeModuleKind; -use deno_runtime::deno_node::NodePermissions; -use deno_runtime::deno_node::NodeResolutionMode; -use deno_runtime::deno_node::PackageJson; -use deno_runtime::deno_node::PathClean; -use deno_runtime::deno_node::RealFs; -use deno_runtime::deno_node::RequireNpmResolver; -use deno_runtime::deno_node::NODE_GLOBAL_THIS_NAME; -use once_cell::sync::Lazy; +use deno_runtime::deno_node::analyze::CjsAnalysis as ExtNodeCjsAnalysis; +use deno_runtime::deno_node::analyze::CjsEsmCodeAnalyzer; use crate::cache::NodeAnalysisCache; -use crate::file_fetcher::FileFetcher; -use crate::npm::NpmPackageResolver; -static NODE_GLOBALS: &[&str] = &[ - "Buffer", - "clearImmediate", - "clearInterval", - "clearTimeout", - "console", - "global", - "process", - "setImmediate", - "setInterval", - "setTimeout", -]; - -pub struct NodeCodeTranslator { - analysis_cache: NodeAnalysisCache, - file_fetcher: Arc<FileFetcher>, - npm_resolver: Arc<NpmPackageResolver>, +pub struct CliCjsEsmCodeAnalyzer { + cache: NodeAnalysisCache, } -impl NodeCodeTranslator { - pub fn new( - analysis_cache: NodeAnalysisCache, - file_fetcher: Arc<FileFetcher>, - npm_resolver: Arc<NpmPackageResolver>, - ) -> Self { - Self { - analysis_cache, - file_fetcher, - npm_resolver, - } +impl CliCjsEsmCodeAnalyzer { + pub fn new(cache: NodeAnalysisCache) -> Self { + Self { cache } } - pub fn esm_code_with_node_globals( + fn inner_cjs_analysis( &self, specifier: &ModuleSpecifier, - code: String, - ) -> Result<String, AnyError> { - esm_code_with_node_globals(&self.analysis_cache, specifier, code) - } - - /// Translates given CJS module into ESM. This function will perform static - /// analysis on the file to find defined exports and reexports. - /// - /// For all discovered reexports the analysis will be performed recursively. - /// - /// If successful a source code for equivalent ES module is returned. - pub fn translate_cjs_to_esm( - &self, - specifier: &ModuleSpecifier, - code: String, - media_type: MediaType, - permissions: &mut dyn NodePermissions, - ) -> Result<String, AnyError> { - let mut temp_var_count = 0; - let mut handled_reexports: HashSet<String> = HashSet::default(); - - let mut source = vec![ - r#"import {createRequire as __internalCreateRequire} from "node:module"; - const require = __internalCreateRequire(import.meta.url);"# - .to_string(), - ]; - - let analysis = - self.perform_cjs_analysis(specifier.as_str(), media_type, code)?; - - let mut all_exports = analysis - .exports - .iter() - .map(|s| s.to_string()) - .collect::<HashSet<_>>(); - - // (request, referrer) - let mut reexports_to_handle = VecDeque::new(); - for reexport in analysis.reexports { - reexports_to_handle.push_back((reexport, specifier.clone())); - } - - while let Some((reexport, referrer)) = reexports_to_handle.pop_front() { - if handled_reexports.contains(&reexport) { - continue; - } - - handled_reexports.insert(reexport.to_string()); - - // First, resolve relate reexport specifier - let resolved_reexport = self.resolve( - &reexport, - &referrer, - // FIXME(bartlomieju): check if these conditions are okay, probably - // should be `deno-require`, because `deno` is already used in `esm_resolver.rs` - &["deno", "require", "default"], - NodeResolutionMode::Execution, - permissions, - )?; - let reexport_specifier = - ModuleSpecifier::from_file_path(resolved_reexport).unwrap(); - // Second, read the source code from disk - let reexport_file = self - .file_fetcher - .get_source(&reexport_specifier) - .ok_or_else(|| { - anyhow!( - "Could not find '{}' ({}) referenced from {}", - reexport, - reexport_specifier, - referrer - ) - })?; - - { - let analysis = self.perform_cjs_analysis( - reexport_specifier.as_str(), - reexport_file.media_type, - reexport_file.source.to_string(), - )?; - - for reexport in analysis.reexports { - reexports_to_handle.push_back((reexport, reexport_specifier.clone())); - } - - all_exports.extend( - analysis - .exports - .into_iter() - .filter(|e| e.as_str() != "default"), - ); - } - } - - source.push(format!( - "const mod = require(\"{}\");", - specifier - .to_file_path() - .unwrap() - .to_str() - .unwrap() - .replace('\\', "\\\\") - .replace('\'', "\\\'") - .replace('\"', "\\\"") - )); - - for export in &all_exports { - if export.as_str() != "default" { - add_export( - &mut source, - export, - &format!("mod[\"{export}\"]"), - &mut temp_var_count, - ); - } - } - - source.push("export default mod;".to_string()); - - let translated_source = source.join("\n"); - Ok(translated_source) - } - - fn perform_cjs_analysis( - &self, - specifier: &str, - media_type: MediaType, - code: String, + source: &str, ) -> Result<CjsAnalysis, AnyError> { - let source_hash = NodeAnalysisCache::compute_source_hash(&code); + let source_hash = NodeAnalysisCache::compute_source_hash(source); if let Some(analysis) = self - .analysis_cache - .get_cjs_analysis(specifier, &source_hash) + .cache + .get_cjs_analysis(specifier.as_str(), &source_hash) { return Ok(analysis); } + let media_type = MediaType::from_specifier(specifier); if media_type == MediaType::Json { return Ok(CjsAnalysis { exports: vec![], @@ -213,7 +48,7 @@ impl NodeCodeTranslator { let parsed_source = deno_ast::parse_script(deno_ast::ParseParams { specifier: specifier.to_string(), - text_info: deno_ast::SourceTextInfo::new(code.into()), + text_info: deno_ast::SourceTextInfo::new(source.into()), media_type, capture_tokens: true, scope_analysis: false, @@ -221,175 +56,61 @@ impl NodeCodeTranslator { })?; let analysis = parsed_source.analyze_cjs(); self - .analysis_cache - .set_cjs_analysis(specifier, &source_hash, &analysis); + .cache + .set_cjs_analysis(specifier.as_str(), &source_hash, &analysis); Ok(analysis) } +} - fn resolve( +impl CjsEsmCodeAnalyzer for CliCjsEsmCodeAnalyzer { + fn analyze_cjs( &self, - specifier: &str, - referrer: &ModuleSpecifier, - conditions: &[&str], - mode: NodeResolutionMode, - permissions: &mut dyn NodePermissions, - ) -> Result<PathBuf, AnyError> { - if specifier.starts_with('/') { - todo!(); - } - - let referrer_path = referrer.to_file_path().unwrap(); - if specifier.starts_with("./") || specifier.starts_with("../") { - if let Some(parent) = referrer_path.parent() { - return file_extension_probe(parent.join(specifier), &referrer_path); - } else { - todo!(); - } - } - - // We've got a bare specifier or maybe bare_specifier/blah.js" - - let (package_specifier, package_subpath) = - parse_specifier(specifier).unwrap(); - - // todo(dsherret): use not_found error on not found here - let resolver = self.npm_resolver.as_require_npm_resolver(); - let module_dir = resolver.resolve_package_folder_from_package( - package_specifier.as_str(), - &referrer_path, - mode, - )?; - - let package_json_path = module_dir.join("package.json"); - if package_json_path.exists() { - let package_json = PackageJson::load::<RealFs>( - &self.npm_resolver.as_require_npm_resolver(), - permissions, - package_json_path.clone(), - )?; - - if let Some(exports) = &package_json.exports { - return package_exports_resolve::<RealFs>( - &package_json_path, - package_subpath, - exports, - referrer, - NodeModuleKind::Esm, - conditions, - mode, - &self.npm_resolver.as_require_npm_resolver(), - permissions, - ); - } - - // old school - if package_subpath != "." { - let d = module_dir.join(package_subpath); - if let Ok(m) = d.metadata() { - if m.is_dir() { - // subdir might have a package.json that specifies the entrypoint - let package_json_path = d.join("package.json"); - if package_json_path.exists() { - let package_json = PackageJson::load::<RealFs>( - &self.npm_resolver.as_require_npm_resolver(), - permissions, - package_json_path, - )?; - if let Some(main) = package_json.main(NodeModuleKind::Cjs) { - return Ok(d.join(main).clean()); - } - } - - return Ok(d.join("index.js").clean()); - } - } - return file_extension_probe(d, &referrer_path); - } else if let Some(main) = package_json.main(NodeModuleKind::Cjs) { - return Ok(module_dir.join(main).clean()); - } else { - return Ok(module_dir.join("index.js").clean()); - } - } - Err(not_found(specifier, &referrer_path)) + specifier: &ModuleSpecifier, + source: &str, + ) -> Result<ExtNodeCjsAnalysis, AnyError> { + let analysis = self.inner_cjs_analysis(specifier, source)?; + Ok(ExtNodeCjsAnalysis { + exports: analysis.exports, + reexports: analysis.reexports, + }) } -} - -fn esm_code_with_node_globals( - analysis_cache: &NodeAnalysisCache, - specifier: &ModuleSpecifier, - code: String, -) -> Result<String, AnyError> { - // TODO(dsherret): this code is way more inefficient than it needs to be. - // - // In the future, we should disable capturing tokens & scope analysis - // and instead only use swc's APIs to go through the portions of the tree - // that we know will affect the global scope while still ensuring that - // `var` decls are taken into consideration. - let source_hash = NodeAnalysisCache::compute_source_hash(&code); - let text_info = deno_ast::SourceTextInfo::from_string(code); - let top_level_decls = if let Some(decls) = - analysis_cache.get_esm_analysis(specifier.as_str(), &source_hash) - { - HashSet::from_iter(decls) - } else { - let parsed_source = deno_ast::parse_program(deno_ast::ParseParams { - specifier: specifier.to_string(), - text_info: text_info.clone(), - media_type: deno_ast::MediaType::from_specifier(specifier), - capture_tokens: true, - scope_analysis: true, - maybe_syntax: None, - })?; - let top_level_decls = analyze_top_level_decls(&parsed_source)?; - analysis_cache.set_esm_analysis( - specifier.as_str(), - &source_hash, - &top_level_decls.clone().into_iter().collect(), - ); - top_level_decls - }; - - Ok(esm_code_from_top_level_decls( - text_info.text_str(), - &top_level_decls, - )) -} -fn esm_code_from_top_level_decls( - file_text: &str, - top_level_decls: &HashSet<String>, -) -> String { - let mut globals = Vec::with_capacity(NODE_GLOBALS.len()); - let has_global_this = top_level_decls.contains("globalThis"); - for global in NODE_GLOBALS.iter() { - if !top_level_decls.contains(&global.to_string()) { - globals.push(*global); + fn analyze_esm_top_level_decls( + &self, + specifier: &ModuleSpecifier, + source: &str, + ) -> Result<HashSet<String>, AnyError> { + // TODO(dsherret): this code is way more inefficient than it needs to be. + // + // In the future, we should disable capturing tokens & scope analysis + // and instead only use swc's APIs to go through the portions of the tree + // that we know will affect the global scope while still ensuring that + // `var` decls are taken into consideration. + let source_hash = NodeAnalysisCache::compute_source_hash(source); + if let Some(decls) = self + .cache + .get_esm_analysis(specifier.as_str(), &source_hash) + { + Ok(HashSet::from_iter(decls)) + } else { + let parsed_source = deno_ast::parse_program(deno_ast::ParseParams { + specifier: specifier.to_string(), + text_info: deno_ast::SourceTextInfo::from_string(source.to_string()), + media_type: deno_ast::MediaType::from_specifier(specifier), + capture_tokens: true, + scope_analysis: true, + maybe_syntax: None, + })?; + let top_level_decls = analyze_top_level_decls(&parsed_source)?; + self.cache.set_esm_analysis( + specifier.as_str(), + &source_hash, + &top_level_decls.clone().into_iter().collect::<Vec<_>>(), + ); + Ok(top_level_decls) } } - - let mut result = String::new(); - let global_this_expr = NODE_GLOBAL_THIS_NAME.as_str(); - let global_this_expr = if has_global_this { - global_this_expr - } else { - write!(result, "var globalThis = {global_this_expr};").unwrap(); - "globalThis" - }; - for global in globals { - write!(result, "var {global} = {global_this_expr}.{global};").unwrap(); - } - - // strip the shebang - let file_text = if file_text.starts_with("#!/") { - let start_index = file_text.find('\n').unwrap_or(file_text.len()); - &file_text[start_index..] - } else { - file_text - }; - result.push_str(file_text); - - result } fn analyze_top_level_decls( @@ -455,236 +176,3 @@ fn is_local_declaration_ident(node: Node) -> bool { false } } - -static RESERVED_WORDS: Lazy<HashSet<&str>> = Lazy::new(|| { - HashSet::from([ - "break", - "case", - "catch", - "class", - "const", - "continue", - "debugger", - "default", - "delete", - "do", - "else", - "export", - "extends", - "false", - "finally", - "for", - "function", - "if", - "import", - "in", - "instanceof", - "new", - "null", - "return", - "super", - "switch", - "this", - "throw", - "true", - "try", - "typeof", - "var", - "void", - "while", - "with", - "yield", - "let", - "enum", - "implements", - "interface", - "package", - "private", - "protected", - "public", - "static", - ]) -}); - -fn add_export( - source: &mut Vec<String>, - name: &str, - initializer: &str, - temp_var_count: &mut usize, -) { - fn is_valid_var_decl(name: &str) -> bool { - // it's ok to be super strict here - name - .chars() - .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$') - } - - // TODO(bartlomieju): Node actually checks if a given export exists in `exports` object, - // but it might not be necessary here since our analysis is more detailed? - if RESERVED_WORDS.contains(name) || !is_valid_var_decl(name) { - *temp_var_count += 1; - // we can't create an identifier with a reserved word or invalid identifier name, - // so assign it to a temporary variable that won't have a conflict, then re-export - // it as a string - source.push(format!( - "const __deno_export_{temp_var_count}__ = {initializer};" - )); - source.push(format!( - "export {{ __deno_export_{temp_var_count}__ as \"{name}\" }};" - )); - } else { - source.push(format!("export const {name} = {initializer};")); - } -} - -fn parse_specifier(specifier: &str) -> Option<(String, String)> { - let mut separator_index = specifier.find('/'); - let mut valid_package_name = true; - // let mut is_scoped = false; - if specifier.is_empty() { - valid_package_name = false; - } else if specifier.starts_with('@') { - // is_scoped = true; - if let Some(index) = separator_index { - separator_index = specifier[index + 1..].find('/').map(|i| i + index + 1); - } else { - valid_package_name = false; - } - } - - let package_name = if let Some(index) = separator_index { - specifier[0..index].to_string() - } else { - specifier.to_string() - }; - - // Package name cannot have leading . and cannot have percent-encoding or separators. - for ch in package_name.chars() { - if ch == '%' || ch == '\\' { - valid_package_name = false; - break; - } - } - - if !valid_package_name { - return None; - } - - let package_subpath = if let Some(index) = separator_index { - format!(".{}", specifier.chars().skip(index).collect::<String>()) - } else { - ".".to_string() - }; - - Some((package_name, package_subpath)) -} - -fn file_extension_probe( - p: PathBuf, - referrer: &Path, -) -> Result<PathBuf, AnyError> { - let p = p.clean(); - if p.exists() { - let file_name = p.file_name().unwrap(); - let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap())); - if p_js.exists() && p_js.is_file() { - return Ok(p_js); - } else if p.is_dir() { - return Ok(p.join("index.js")); - } else { - return Ok(p); - } - } else if let Some(file_name) = p.file_name() { - let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap())); - if p_js.exists() && p_js.is_file() { - return Ok(p_js); - } - } - Err(not_found(&p.to_string_lossy(), referrer)) -} - -fn not_found(path: &str, referrer: &Path) -> AnyError { - let msg = format!( - "[ERR_MODULE_NOT_FOUND] Cannot find module \"{}\" imported from \"{}\"", - path, - referrer.to_string_lossy() - ); - std::io::Error::new(std::io::ErrorKind::NotFound, msg).into() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_esm_code_with_node_globals() { - let r = esm_code_with_node_globals( - &NodeAnalysisCache::new_in_memory(), - &ModuleSpecifier::parse("https://example.com/foo/bar.js").unwrap(), - "export const x = 1;".to_string(), - ) - .unwrap(); - assert!(r.contains(&format!( - "var globalThis = {};", - NODE_GLOBAL_THIS_NAME.as_str() - ))); - assert!(r.contains("var process = globalThis.process;")); - assert!(r.contains("export const x = 1;")); - } - - #[test] - fn test_esm_code_with_node_globals_with_shebang() { - let r = esm_code_with_node_globals( - &NodeAnalysisCache::new_in_memory(), - &ModuleSpecifier::parse("https://example.com/foo/bar.js").unwrap(), - "#!/usr/bin/env node\nexport const x = 1;".to_string(), - ) - .unwrap(); - assert_eq!( - r, - format!( - concat!( - "var globalThis = {}", - ";var Buffer = globalThis.Buffer;", - "var clearImmediate = globalThis.clearImmediate;var clearInterval = globalThis.clearInterval;", - "var clearTimeout = globalThis.clearTimeout;var console = globalThis.console;", - "var global = globalThis.global;var process = globalThis.process;", - "var setImmediate = globalThis.setImmediate;var setInterval = globalThis.setInterval;", - "var setTimeout = globalThis.setTimeout;\n", - "export const x = 1;" - ), - NODE_GLOBAL_THIS_NAME.as_str(), - ) - ); - } - - #[test] - fn test_add_export() { - let mut temp_var_count = 0; - let mut source = vec![]; - - let exports = vec!["static", "server", "app", "dashed-export"]; - for export in exports { - add_export(&mut source, export, "init", &mut temp_var_count); - } - assert_eq!( - source, - vec![ - "const __deno_export_1__ = init;".to_string(), - "export { __deno_export_1__ as \"static\" };".to_string(), - "export const server = init;".to_string(), - "export const app = init;".to_string(), - "const __deno_export_2__ = init;".to_string(), - "export { __deno_export_2__ as \"dashed-export\" };".to_string(), - ] - ) - } - - #[test] - fn test_parse_specifier() { - assert_eq!( - parse_specifier("@some-package/core/actions"), - Some(("@some-package/core".to_string(), "./actions".to_string())) - ); - } -} diff --git a/cli/node/mod.rs b/cli/node/mod.rs index 01216f50c..eb584879e 100644 --- a/cli/node/mod.rs +++ b/cli/node/mod.rs @@ -33,14 +33,14 @@ use deno_semver::npm::NpmPackageNv; use deno_semver::npm::NpmPackageNvReference; use deno_semver::npm::NpmPackageReqReference; +use crate::npm::CliRequireNpmResolver; use crate::npm::NpmPackageResolver; use crate::npm::NpmResolution; -use crate::npm::RequireNpmPackageResolver; use crate::util::fs::canonicalize_path_maybe_not_exists; mod analyze; -pub use analyze::NodeCodeTranslator; +pub use analyze::CliCjsEsmCodeAnalyzer; #[derive(Debug)] pub enum NodeResolution { @@ -116,7 +116,7 @@ pub fn resolve_builtin_node_module(module_name: &str) -> Result<Url, AnyError> { pub struct CliNodeResolver { npm_resolution: Arc<NpmResolution>, npm_resolver: Arc<NpmPackageResolver>, - require_npm_resolver: RequireNpmPackageResolver, + require_npm_resolver: CliRequireNpmResolver, } impl CliNodeResolver { diff --git a/cli/npm/mod.rs b/cli/npm/mod.rs index 8a38ee079..8f6ac77bc 100644 --- a/cli/npm/mod.rs +++ b/cli/npm/mod.rs @@ -13,6 +13,6 @@ pub use installer::PackageJsonDepsInstaller; pub use registry::CliNpmRegistryApi; pub use resolution::NpmResolution; pub use resolvers::create_npm_fs_resolver; +pub use resolvers::CliRequireNpmResolver; pub use resolvers::NpmPackageResolver; pub use resolvers::NpmProcessState; -pub use resolvers::RequireNpmPackageResolver; diff --git a/cli/npm/resolvers/mod.rs b/cli/npm/resolvers/mod.rs index 5d3eb52b0..f693d3d23 100644 --- a/cli/npm/resolvers/mod.rs +++ b/cli/npm/resolvers/mod.rs @@ -229,17 +229,15 @@ impl NpmPackageResolver { Ok(()) } - pub fn as_require_npm_resolver( - self: &Arc<Self>, - ) -> RequireNpmPackageResolver { - RequireNpmPackageResolver(self.clone()) + pub fn as_require_npm_resolver(self: &Arc<Self>) -> CliRequireNpmResolver { + CliRequireNpmResolver(self.clone()) } } #[derive(Debug)] -pub struct RequireNpmPackageResolver(Arc<NpmPackageResolver>); +pub struct CliRequireNpmResolver(Arc<NpmPackageResolver>); -impl RequireNpmResolver for RequireNpmPackageResolver { +impl RequireNpmResolver for CliRequireNpmResolver { fn resolve_package_folder_from_package( &self, specifier: &str, diff --git a/cli/proc_state.rs b/cli/proc_state.rs index 75466e5fe..bfe45bc86 100644 --- a/cli/proc_state.rs +++ b/cli/proc_state.rs @@ -17,10 +17,11 @@ use crate::graph_util::ModuleGraphBuilder; use crate::graph_util::ModuleGraphContainer; use crate::http_util::HttpClient; use crate::module_loader::ModuleLoadPreparer; +use crate::node::CliCjsEsmCodeAnalyzer; use crate::node::CliNodeResolver; -use crate::node::NodeCodeTranslator; use crate::npm::create_npm_fs_resolver; use crate::npm::CliNpmRegistryApi; +use crate::npm::CliRequireNpmResolver; use crate::npm::NpmCache; use crate::npm::NpmPackageResolver; use crate::npm::NpmResolution; @@ -37,6 +38,7 @@ use deno_core::ModuleSpecifier; use deno_core::SharedArrayBufferStore; use deno_runtime::deno_broadcast_channel::InMemoryBroadcastChannel; +use deno_runtime::deno_node::analyze::NodeCodeTranslator; use deno_runtime::deno_tls::rustls::RootCertStore; use deno_runtime::deno_web::BlobStore; use deno_runtime::inspector_server::InspectorServer; @@ -75,7 +77,8 @@ pub struct Inner { maybe_file_watcher_reporter: Option<FileWatcherReporter>, pub module_graph_builder: Arc<ModuleGraphBuilder>, pub module_load_preparer: Arc<ModuleLoadPreparer>, - pub node_code_translator: Arc<NodeCodeTranslator>, + pub node_code_translator: + Arc<NodeCodeTranslator<CliCjsEsmCodeAnalyzer, CliRequireNpmResolver>>, pub node_resolver: Arc<CliNodeResolver>, pub npm_api: Arc<CliNpmRegistryApi>, pub npm_cache: Arc<NpmCache>, @@ -304,10 +307,10 @@ impl ProcState { let file_fetcher = Arc::new(file_fetcher); let node_analysis_cache = NodeAnalysisCache::new(caches.node_analysis_db(&dir)); + let cjs_esm_analyzer = CliCjsEsmCodeAnalyzer::new(node_analysis_cache); let node_code_translator = Arc::new(NodeCodeTranslator::new( - node_analysis_cache, - file_fetcher.clone(), - npm_resolver.clone(), + cjs_esm_analyzer, + npm_resolver.as_require_npm_resolver(), )); let node_resolver = Arc::new(CliNodeResolver::new( npm_resolution.clone(), diff --git a/ext/node/analyze.rs b/ext/node/analyze.rs new file mode 100644 index 000000000..03bf41995 --- /dev/null +++ b/ext/node/analyze.rs @@ -0,0 +1,564 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::collections::HashSet; +use std::collections::VecDeque; +use std::fmt::Write; +use std::path::Path; +use std::path::PathBuf; + +use deno_core::anyhow::Context; +use deno_core::ModuleSpecifier; +use once_cell::sync::Lazy; + +use deno_core::error::AnyError; + +use crate::package_exports_resolve; +use crate::NodeFs; +use crate::NodeModuleKind; +use crate::NodePermissions; +use crate::NodeResolutionMode; +use crate::PackageJson; +use crate::PathClean; +use crate::RequireNpmResolver; +use crate::NODE_GLOBAL_THIS_NAME; + +static NODE_GLOBALS: &[&str] = &[ + "Buffer", + "clearImmediate", + "clearInterval", + "clearTimeout", + "console", + "global", + "process", + "setImmediate", + "setInterval", + "setTimeout", +]; + +#[derive(Debug, Clone)] +pub struct CjsAnalysis { + pub exports: Vec<String>, + pub reexports: Vec<String>, +} + +/// Code analyzer for CJS and ESM files. +pub trait CjsEsmCodeAnalyzer { + /// Analyzes CommonJs code for exports and reexports, which is + /// then used to determine the wrapper ESM module exports. + fn analyze_cjs( + &self, + specifier: &ModuleSpecifier, + source: &str, + ) -> Result<CjsAnalysis, AnyError>; + + /// Analyzes ESM code for top level declarations. This is used + /// to help inform injecting node specific globals into Node ESM + /// code. For example, if a top level `setTimeout` function exists + /// then we don't want to inject a `setTimeout` declaration. + /// + /// Note: This will go away in the future once we do this all in v8. + fn analyze_esm_top_level_decls( + &self, + specifier: &ModuleSpecifier, + source: &str, + ) -> Result<HashSet<String>, AnyError>; +} + +pub struct NodeCodeTranslator< + TCjsEsmCodeAnalyzer: CjsEsmCodeAnalyzer, + TRequireNpmResolver: RequireNpmResolver, +> { + cjs_esm_code_analyzer: TCjsEsmCodeAnalyzer, + npm_resolver: TRequireNpmResolver, +} + +impl< + TCjsEsmCodeAnalyzer: CjsEsmCodeAnalyzer, + TRequireNpmResolver: RequireNpmResolver, + > NodeCodeTranslator<TCjsEsmCodeAnalyzer, TRequireNpmResolver> +{ + pub fn new( + cjs_esm_code_analyzer: TCjsEsmCodeAnalyzer, + npm_resolver: TRequireNpmResolver, + ) -> Self { + Self { + cjs_esm_code_analyzer, + npm_resolver, + } + } + + /// Resolves the code to be used when executing Node specific ESM code. + /// + /// Note: This will go away in the future once we do this all in v8. + pub fn esm_code_with_node_globals( + &self, + specifier: &ModuleSpecifier, + source: &str, + ) -> Result<String, AnyError> { + let top_level_decls = self + .cjs_esm_code_analyzer + .analyze_esm_top_level_decls(specifier, source)?; + Ok(esm_code_from_top_level_decls(source, &top_level_decls)) + } + + /// Translates given CJS module into ESM. This function will perform static + /// analysis on the file to find defined exports and reexports. + /// + /// For all discovered reexports the analysis will be performed recursively. + /// + /// If successful a source code for equivalent ES module is returned. + pub fn translate_cjs_to_esm<Fs: NodeFs>( + &self, + specifier: &ModuleSpecifier, + source: &str, + permissions: &mut dyn NodePermissions, + ) -> Result<String, AnyError> { + let mut temp_var_count = 0; + let mut handled_reexports: HashSet<String> = HashSet::default(); + + let analysis = self.cjs_esm_code_analyzer.analyze_cjs(specifier, source)?; + + let mut source = vec![ + r#"import {createRequire as __internalCreateRequire} from "node:module"; + const require = __internalCreateRequire(import.meta.url);"# + .to_string(), + ]; + + let mut all_exports = analysis + .exports + .iter() + .map(|s| s.to_string()) + .collect::<HashSet<_>>(); + + // (request, referrer) + let mut reexports_to_handle = VecDeque::new(); + for reexport in analysis.reexports { + reexports_to_handle.push_back((reexport, specifier.clone())); + } + + while let Some((reexport, referrer)) = reexports_to_handle.pop_front() { + if handled_reexports.contains(&reexport) { + continue; + } + + handled_reexports.insert(reexport.to_string()); + + // First, resolve relate reexport specifier + let resolved_reexport = self.resolve::<Fs>( + &reexport, + &referrer, + // FIXME(bartlomieju): check if these conditions are okay, probably + // should be `deno-require`, because `deno` is already used in `esm_resolver.rs` + &["deno", "require", "default"], + NodeResolutionMode::Execution, + permissions, + )?; + // Second, read the source code from disk + let reexport_specifier = + ModuleSpecifier::from_file_path(&resolved_reexport).unwrap(); + let reexport_file_text = Fs::read_to_string(&resolved_reexport) + .with_context(|| { + format!( + "Could not find '{}' ({}) referenced from {}", + reexport, reexport_specifier, referrer + ) + })?; + { + let analysis = self + .cjs_esm_code_analyzer + .analyze_cjs(&reexport_specifier, &reexport_file_text)?; + + for reexport in analysis.reexports { + reexports_to_handle.push_back((reexport, reexport_specifier.clone())); + } + + all_exports.extend( + analysis + .exports + .into_iter() + .filter(|e| e.as_str() != "default"), + ); + } + } + + source.push(format!( + "const mod = require(\"{}\");", + specifier + .to_file_path() + .unwrap() + .to_str() + .unwrap() + .replace('\\', "\\\\") + .replace('\'', "\\\'") + .replace('\"', "\\\"") + )); + + for export in &all_exports { + if export.as_str() != "default" { + add_export( + &mut source, + export, + &format!("mod[\"{export}\"]"), + &mut temp_var_count, + ); + } + } + + source.push("export default mod;".to_string()); + + let translated_source = source.join("\n"); + Ok(translated_source) + } + + fn resolve<Fs: NodeFs>( + &self, + specifier: &str, + referrer: &ModuleSpecifier, + conditions: &[&str], + mode: NodeResolutionMode, + permissions: &mut dyn NodePermissions, + ) -> Result<PathBuf, AnyError> { + if specifier.starts_with('/') { + todo!(); + } + + let referrer_path = referrer.to_file_path().unwrap(); + if specifier.starts_with("./") || specifier.starts_with("../") { + if let Some(parent) = referrer_path.parent() { + return file_extension_probe::<Fs>( + parent.join(specifier), + &referrer_path, + ); + } else { + todo!(); + } + } + + // We've got a bare specifier or maybe bare_specifier/blah.js" + + let (package_specifier, package_subpath) = + parse_specifier(specifier).unwrap(); + + // todo(dsherret): use not_found error on not found here + let module_dir = self.npm_resolver.resolve_package_folder_from_package( + package_specifier.as_str(), + &referrer_path, + mode, + )?; + + let package_json_path = module_dir.join("package.json"); + if Fs::exists(&package_json_path) { + let package_json = PackageJson::load::<Fs>( + &self.npm_resolver, + permissions, + package_json_path.clone(), + )?; + + if let Some(exports) = &package_json.exports { + return package_exports_resolve::<Fs>( + &package_json_path, + package_subpath, + exports, + referrer, + NodeModuleKind::Esm, + conditions, + mode, + &self.npm_resolver, + permissions, + ); + } + + // old school + if package_subpath != "." { + let d = module_dir.join(package_subpath); + if Fs::is_dir(&d) { + // subdir might have a package.json that specifies the entrypoint + let package_json_path = d.join("package.json"); + if Fs::exists(&package_json_path) { + let package_json = PackageJson::load::<Fs>( + &self.npm_resolver, + permissions, + package_json_path, + )?; + if let Some(main) = package_json.main(NodeModuleKind::Cjs) { + return Ok(d.join(main).clean()); + } + } + + return Ok(d.join("index.js").clean()); + } + return file_extension_probe::<Fs>(d, &referrer_path); + } else if let Some(main) = package_json.main(NodeModuleKind::Cjs) { + return Ok(module_dir.join(main).clean()); + } else { + return Ok(module_dir.join("index.js").clean()); + } + } + Err(not_found(specifier, &referrer_path)) + } +} + +fn esm_code_from_top_level_decls( + file_text: &str, + top_level_decls: &HashSet<String>, +) -> String { + let mut globals = Vec::with_capacity(NODE_GLOBALS.len()); + let has_global_this = top_level_decls.contains("globalThis"); + for global in NODE_GLOBALS.iter() { + if !top_level_decls.contains(&global.to_string()) { + globals.push(*global); + } + } + + let mut result = String::new(); + let global_this_expr = NODE_GLOBAL_THIS_NAME.as_str(); + let global_this_expr = if has_global_this { + global_this_expr + } else { + write!(result, "var globalThis = {global_this_expr};").unwrap(); + "globalThis" + }; + for global in globals { + write!(result, "var {global} = {global_this_expr}.{global};").unwrap(); + } + + // strip the shebang + let file_text = if file_text.starts_with("#!/") { + let start_index = file_text.find('\n').unwrap_or(file_text.len()); + &file_text[start_index..] + } else { + file_text + }; + result.push_str(file_text); + + result +} + +static RESERVED_WORDS: Lazy<HashSet<&str>> = Lazy::new(|| { + HashSet::from([ + "break", + "case", + "catch", + "class", + "const", + "continue", + "debugger", + "default", + "delete", + "do", + "else", + "export", + "extends", + "false", + "finally", + "for", + "function", + "if", + "import", + "in", + "instanceof", + "new", + "null", + "return", + "super", + "switch", + "this", + "throw", + "true", + "try", + "typeof", + "var", + "void", + "while", + "with", + "yield", + "let", + "enum", + "implements", + "interface", + "package", + "private", + "protected", + "public", + "static", + ]) +}); + +fn add_export( + source: &mut Vec<String>, + name: &str, + initializer: &str, + temp_var_count: &mut usize, +) { + fn is_valid_var_decl(name: &str) -> bool { + // it's ok to be super strict here + name + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$') + } + + // TODO(bartlomieju): Node actually checks if a given export exists in `exports` object, + // but it might not be necessary here since our analysis is more detailed? + if RESERVED_WORDS.contains(name) || !is_valid_var_decl(name) { + *temp_var_count += 1; + // we can't create an identifier with a reserved word or invalid identifier name, + // so assign it to a temporary variable that won't have a conflict, then re-export + // it as a string + source.push(format!( + "const __deno_export_{temp_var_count}__ = {initializer};" + )); + source.push(format!( + "export {{ __deno_export_{temp_var_count}__ as \"{name}\" }};" + )); + } else { + source.push(format!("export const {name} = {initializer};")); + } +} + +fn parse_specifier(specifier: &str) -> Option<(String, String)> { + let mut separator_index = specifier.find('/'); + let mut valid_package_name = true; + // let mut is_scoped = false; + if specifier.is_empty() { + valid_package_name = false; + } else if specifier.starts_with('@') { + // is_scoped = true; + if let Some(index) = separator_index { + separator_index = specifier[index + 1..].find('/').map(|i| i + index + 1); + } else { + valid_package_name = false; + } + } + + let package_name = if let Some(index) = separator_index { + specifier[0..index].to_string() + } else { + specifier.to_string() + }; + + // Package name cannot have leading . and cannot have percent-encoding or separators. + for ch in package_name.chars() { + if ch == '%' || ch == '\\' { + valid_package_name = false; + break; + } + } + + if !valid_package_name { + return None; + } + + let package_subpath = if let Some(index) = separator_index { + format!(".{}", specifier.chars().skip(index).collect::<String>()) + } else { + ".".to_string() + }; + + Some((package_name, package_subpath)) +} + +fn file_extension_probe<Fs: NodeFs>( + p: PathBuf, + referrer: &Path, +) -> Result<PathBuf, AnyError> { + let p = p.clean(); + if Fs::exists(&p) { + let file_name = p.file_name().unwrap(); + let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap())); + if Fs::is_file(&p_js) { + return Ok(p_js); + } else if Fs::is_dir(&p) { + return Ok(p.join("index.js")); + } else { + return Ok(p); + } + } else if let Some(file_name) = p.file_name() { + let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap())); + if Fs::is_file(&p_js) { + return Ok(p_js); + } + } + Err(not_found(&p.to_string_lossy(), referrer)) +} + +fn not_found(path: &str, referrer: &Path) -> AnyError { + let msg = format!( + "[ERR_MODULE_NOT_FOUND] Cannot find module \"{}\" imported from \"{}\"", + path, + referrer.to_string_lossy() + ); + std::io::Error::new(std::io::ErrorKind::NotFound, msg).into() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_esm_code_with_node_globals() { + let r = esm_code_from_top_level_decls( + "export const x = 1;", + &HashSet::from(["x".to_string()]), + ); + assert!(r.contains(&format!( + "var globalThis = {};", + NODE_GLOBAL_THIS_NAME.as_str() + ))); + assert!(r.contains("var process = globalThis.process;")); + assert!(r.contains("export const x = 1;")); + } + + #[test] + fn test_esm_code_with_node_globals_with_shebang() { + let r = esm_code_from_top_level_decls( + "#!/usr/bin/env node\nexport const x = 1;", + &HashSet::from(["x".to_string()]), + ); + assert_eq!( + r, + format!( + concat!( + "var globalThis = {}", + ";var Buffer = globalThis.Buffer;", + "var clearImmediate = globalThis.clearImmediate;var clearInterval = globalThis.clearInterval;", + "var clearTimeout = globalThis.clearTimeout;var console = globalThis.console;", + "var global = globalThis.global;var process = globalThis.process;", + "var setImmediate = globalThis.setImmediate;var setInterval = globalThis.setInterval;", + "var setTimeout = globalThis.setTimeout;\n", + "export const x = 1;" + ), + NODE_GLOBAL_THIS_NAME.as_str(), + ) + ); + } + + #[test] + fn test_add_export() { + let mut temp_var_count = 0; + let mut source = vec![]; + + let exports = vec!["static", "server", "app", "dashed-export"]; + for export in exports { + add_export(&mut source, export, "init", &mut temp_var_count); + } + assert_eq!( + source, + vec![ + "const __deno_export_1__ = init;".to_string(), + "export { __deno_export_1__ as \"static\" };".to_string(), + "export const server = init;".to_string(), + "export const app = init;".to_string(), + "const __deno_export_2__ = init;".to_string(), + "export { __deno_export_2__ as \"dashed-export\" };".to_string(), + ] + ) + } + + #[test] + fn test_parse_specifier() { + assert_eq!( + parse_specifier("@some-package/core/actions"), + Some(("@some-package/core".to_string(), "./actions".to_string())) + ); + } +} diff --git a/ext/node/crypto/x509.rs b/ext/node/crypto/x509.rs index 776103e1e..402c58b72 100644 --- a/ext/node/crypto/x509.rs +++ b/ext/node/crypto/x509.rs @@ -228,6 +228,8 @@ fn x509name_to_string( name: &X509Name, oid_registry: &oid_registry::OidRegistry, ) -> Result<String, x509_parser::error::X509Error> { + // Lifted from https://github.com/rusticata/x509-parser/blob/4d618c2ed6b1fc102df16797545895f7c67ee0fe/src/x509.rs#L543-L566 + // since it's a private function (Copyright 2017 Pierre Chifflier) name.iter_rdn().fold(Ok(String::new()), |acc, rdn| { acc.and_then(|mut _vec| { rdn @@ -244,13 +246,13 @@ fn x509name_to_string( let rdn = format!("{}={}", abbrev, val_str); match _vec2.len() { 0 => Ok(rdn), - _ => Ok(_vec2 + " + " + &rdn), + _ => Ok(_vec2 + " + " + rdn.as_str()), } }) }) .map(|v| match _vec.len() { 0 => v, - _ => _vec + "\n" + &v, + _ => _vec + "\n" + v.as_str(), }) }) }) diff --git a/ext/node/lib.rs b/ext/node/lib.rs index 65db6e45f..a521e161c 100644 --- a/ext/node/lib.rs +++ b/ext/node/lib.rs @@ -12,6 +12,7 @@ use std::path::Path; use std::path::PathBuf; use std::rc::Rc; +pub mod analyze; mod crypto; pub mod errors; mod idna; |