diff options
Diffstat (limited to 'resolvers/node/analyze.rs')
-rw-r--r-- | resolvers/node/analyze.rs | 654 |
1 files changed, 654 insertions, 0 deletions
diff --git a/resolvers/node/analyze.rs b/resolvers/node/analyze.rs new file mode 100644 index 000000000..deb56d064 --- /dev/null +++ b/resolvers/node/analyze.rs @@ -0,0 +1,654 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::borrow::Cow; +use std::collections::BTreeSet; +use std::collections::HashSet; +use std::path::Path; +use std::path::PathBuf; + +use futures::future::LocalBoxFuture; +use futures::stream::FuturesUnordered; +use futures::FutureExt; +use futures::StreamExt; +use once_cell::sync::Lazy; + +use anyhow::Context; +use anyhow::Error as AnyError; +use url::Url; + +use crate::env::NodeResolverEnv; +use crate::package_json::load_pkg_json; +use crate::path::to_file_specifier; +use crate::resolution::NodeResolverRc; +use crate::NodeModuleKind; +use crate::NodeResolutionMode; +use crate::NpmResolverRc; +use crate::PathClean; + +#[derive(Debug, Clone)] +pub enum CjsAnalysis { + /// File was found to be an ES module and the translator should + /// load the code as ESM. + Esm(String), + Cjs(CjsAnalysisExports), +} + +#[derive(Debug, Clone)] +pub struct CjsAnalysisExports { + pub exports: Vec<String>, + pub reexports: Vec<String>, +} + +/// Code analyzer for CJS and ESM files. +#[async_trait::async_trait(?Send)] +pub trait CjsCodeAnalyzer { + /// Analyzes CommonJs code for exports and reexports, which is + /// then used to determine the wrapper ESM module exports. + /// + /// Note that the source is provided by the caller when the caller + /// already has it. If the source is needed by the implementation, + /// then it can use the provided source, or otherwise load it if + /// necessary. + async fn analyze_cjs( + &self, + specifier: &Url, + maybe_source: Option<String>, + ) -> Result<CjsAnalysis, AnyError>; +} + +pub struct NodeCodeTranslator< + TCjsCodeAnalyzer: CjsCodeAnalyzer, + TNodeResolverEnv: NodeResolverEnv, +> { + cjs_code_analyzer: TCjsCodeAnalyzer, + env: TNodeResolverEnv, + node_resolver: NodeResolverRc<TNodeResolverEnv>, + npm_resolver: NpmResolverRc, +} + +impl<TCjsCodeAnalyzer: CjsCodeAnalyzer, TNodeResolverEnv: NodeResolverEnv> + NodeCodeTranslator<TCjsCodeAnalyzer, TNodeResolverEnv> +{ + pub fn new( + cjs_code_analyzer: TCjsCodeAnalyzer, + env: TNodeResolverEnv, + node_resolver: NodeResolverRc<TNodeResolverEnv>, + npm_resolver: NpmResolverRc, + ) -> Self { + Self { + cjs_code_analyzer, + env, + node_resolver, + npm_resolver, + } + } + + /// Translates given CJS module into ESM. This function will perform static + /// analysis on the file to find defined exports and reexports. + /// + /// For all discovered reexports the analysis will be performed recursively. + /// + /// If successful a source code for equivalent ES module is returned. + pub async fn translate_cjs_to_esm( + &self, + entry_specifier: &Url, + source: Option<String>, + ) -> Result<String, AnyError> { + let mut temp_var_count = 0; + + let analysis = self + .cjs_code_analyzer + .analyze_cjs(entry_specifier, source) + .await?; + + let analysis = match analysis { + CjsAnalysis::Esm(source) => return Ok(source), + CjsAnalysis::Cjs(analysis) => analysis, + }; + + let mut source = vec![ + r#"import {createRequire as __internalCreateRequire} from "node:module"; + const require = __internalCreateRequire(import.meta.url);"# + .to_string(), + ]; + + // use a BTreeSet to make the output deterministic for v8's code cache + let mut all_exports = analysis.exports.into_iter().collect::<BTreeSet<_>>(); + + if !analysis.reexports.is_empty() { + let mut errors = Vec::new(); + self + .analyze_reexports( + entry_specifier, + analysis.reexports, + &mut all_exports, + &mut errors, + ) + .await; + + // surface errors afterwards in a deterministic way + if !errors.is_empty() { + errors.sort_by_cached_key(|e| e.to_string()); + return Err(errors.remove(0)); + } + } + + source.push(format!( + "const mod = require(\"{}\");", + entry_specifier + .to_file_path() + .unwrap() + .to_str() + .unwrap() + .replace('\\', "\\\\") + .replace('\'', "\\\'") + .replace('\"', "\\\"") + )); + + for export in &all_exports { + if export.as_str() != "default" { + add_export( + &mut source, + export, + &format!("mod[\"{}\"]", escape_for_double_quote_string(export)), + &mut temp_var_count, + ); + } + } + + source.push("export default mod;".to_string()); + + let translated_source = source.join("\n"); + Ok(translated_source) + } + + async fn analyze_reexports<'a>( + &'a self, + entry_specifier: &url::Url, + reexports: Vec<String>, + all_exports: &mut BTreeSet<String>, + // this goes through the modules concurrently, so collect + // the errors in order to be deterministic + errors: &mut Vec<anyhow::Error>, + ) { + struct Analysis { + reexport_specifier: url::Url, + referrer: url::Url, + analysis: CjsAnalysis, + } + + type AnalysisFuture<'a> = LocalBoxFuture<'a, Result<Analysis, AnyError>>; + + let mut handled_reexports: HashSet<Url> = HashSet::default(); + handled_reexports.insert(entry_specifier.clone()); + let mut analyze_futures: FuturesUnordered<AnalysisFuture<'a>> = + FuturesUnordered::new(); + let cjs_code_analyzer = &self.cjs_code_analyzer; + let mut handle_reexports = + |referrer: url::Url, + reexports: Vec<String>, + analyze_futures: &mut FuturesUnordered<AnalysisFuture<'a>>, + errors: &mut Vec<anyhow::Error>| { + // 1. Resolve the re-exports and start a future to analyze each one + for reexport in reexports { + let result = self.resolve( + &reexport, + &referrer, + // FIXME(bartlomieju): check if these conditions are okay, probably + // should be `deno-require`, because `deno` is already used in `esm_resolver.rs` + &["deno", "node", "require", "default"], + NodeResolutionMode::Execution, + ); + let reexport_specifier = match result { + Ok(Some(specifier)) => specifier, + Ok(None) => continue, + Err(err) => { + errors.push(err); + continue; + } + }; + + if !handled_reexports.insert(reexport_specifier.clone()) { + continue; + } + + let referrer = referrer.clone(); + let future = async move { + let analysis = cjs_code_analyzer + .analyze_cjs(&reexport_specifier, None) + .await + .with_context(|| { + format!( + "Could not load '{}' ({}) referenced from {}", + reexport, reexport_specifier, referrer + ) + })?; + + Ok(Analysis { + reexport_specifier, + referrer, + analysis, + }) + } + .boxed_local(); + analyze_futures.push(future); + } + }; + + handle_reexports( + entry_specifier.clone(), + reexports, + &mut analyze_futures, + errors, + ); + + while let Some(analysis_result) = analyze_futures.next().await { + // 2. Look at the analysis result and resolve its exports and re-exports + let Analysis { + reexport_specifier, + referrer, + analysis, + } = match analysis_result { + Ok(analysis) => analysis, + Err(err) => { + errors.push(err); + continue; + } + }; + match analysis { + CjsAnalysis::Esm(_) => { + // todo(dsherret): support this once supporting requiring ES modules + errors.push(anyhow::anyhow!( + "Cannot require ES module '{}' from '{}'", + reexport_specifier, + referrer, + )); + } + CjsAnalysis::Cjs(analysis) => { + if !analysis.reexports.is_empty() { + handle_reexports( + reexport_specifier.clone(), + analysis.reexports, + &mut analyze_futures, + errors, + ); + } + + all_exports.extend( + analysis + .exports + .into_iter() + .filter(|e| e.as_str() != "default"), + ); + } + } + } + } + + // todo(dsherret): what is going on here? Isn't this a bunch of duplicate code? + fn resolve( + &self, + specifier: &str, + referrer: &Url, + conditions: &[&str], + mode: NodeResolutionMode, + ) -> Result<Option<Url>, AnyError> { + if specifier.starts_with('/') { + todo!(); + } + + let referrer_path = referrer.to_file_path().unwrap(); + if specifier.starts_with("./") || specifier.starts_with("../") { + if let Some(parent) = referrer_path.parent() { + return Some( + self + .file_extension_probe(parent.join(specifier), &referrer_path) + .map(|p| to_file_specifier(&p)), + ) + .transpose(); + } else { + todo!(); + } + } + + // We've got a bare specifier or maybe bare_specifier/blah.js" + let (package_specifier, package_subpath) = + parse_specifier(specifier).unwrap(); + + let module_dir = match self + .npm_resolver + .resolve_package_folder_from_package(package_specifier.as_str(), referrer) + { + Err(err) + if matches!( + err.as_kind(), + crate::errors::PackageFolderResolveErrorKind::PackageNotFound(..) + ) => + { + return Ok(None); + } + other => other, + }?; + + let package_json_path = module_dir.join("package.json"); + let maybe_package_json = + load_pkg_json(self.env.pkg_json_fs(), &package_json_path)?; + if let Some(package_json) = maybe_package_json { + if let Some(exports) = &package_json.exports { + return Some( + self + .node_resolver + .package_exports_resolve( + &package_json_path, + &package_subpath, + exports, + Some(referrer), + NodeModuleKind::Esm, + conditions, + mode, + ) + .map_err(AnyError::from), + ) + .transpose(); + } + + // old school + if package_subpath != "." { + let d = module_dir.join(package_subpath); + if self.env.is_dir_sync(&d) { + // subdir might have a package.json that specifies the entrypoint + let package_json_path = d.join("package.json"); + let maybe_package_json = + load_pkg_json(self.env.pkg_json_fs(), &package_json_path)?; + if let Some(package_json) = maybe_package_json { + if let Some(main) = package_json.main(NodeModuleKind::Cjs) { + return Ok(Some(to_file_specifier(&d.join(main).clean()))); + } + } + + return Ok(Some(to_file_specifier(&d.join("index.js").clean()))); + } + return Some( + self + .file_extension_probe(d, &referrer_path) + .map(|p| to_file_specifier(&p)), + ) + .transpose(); + } else if let Some(main) = package_json.main(NodeModuleKind::Cjs) { + return Ok(Some(to_file_specifier(&module_dir.join(main).clean()))); + } else { + return Ok(Some(to_file_specifier( + &module_dir.join("index.js").clean(), + ))); + } + } + + // as a fallback, attempt to resolve it via the ancestor directories + let mut last = referrer_path.as_path(); + while let Some(parent) = last.parent() { + if !self.npm_resolver.in_npm_package_at_dir_path(parent) { + break; + } + let path = if parent.ends_with("node_modules") { + parent.join(specifier) + } else { + parent.join("node_modules").join(specifier) + }; + if let Ok(path) = self.file_extension_probe(path, &referrer_path) { + return Ok(Some(to_file_specifier(&path))); + } + last = parent; + } + + Err(not_found(specifier, &referrer_path)) + } + + fn file_extension_probe( + &self, + p: PathBuf, + referrer: &Path, + ) -> Result<PathBuf, AnyError> { + let p = p.clean(); + if self.env.exists_sync(&p) { + let file_name = p.file_name().unwrap(); + let p_js = + p.with_file_name(format!("{}.js", file_name.to_str().unwrap())); + if self.env.is_file_sync(&p_js) { + return Ok(p_js); + } else if self.env.is_dir_sync(&p) { + return Ok(p.join("index.js")); + } else { + return Ok(p); + } + } else if let Some(file_name) = p.file_name() { + { + let p_js = + p.with_file_name(format!("{}.js", file_name.to_str().unwrap())); + if self.env.is_file_sync(&p_js) { + return Ok(p_js); + } + } + { + let p_json = + p.with_file_name(format!("{}.json", file_name.to_str().unwrap())); + if self.env.is_file_sync(&p_json) { + return Ok(p_json); + } + } + } + Err(not_found(&p.to_string_lossy(), referrer)) + } +} + +static RESERVED_WORDS: Lazy<HashSet<&str>> = Lazy::new(|| { + HashSet::from([ + "abstract", + "arguments", + "async", + "await", + "boolean", + "break", + "byte", + "case", + "catch", + "char", + "class", + "const", + "continue", + "debugger", + "default", + "delete", + "do", + "double", + "else", + "enum", + "eval", + "export", + "extends", + "false", + "final", + "finally", + "float", + "for", + "function", + "get", + "goto", + "if", + "implements", + "import", + "in", + "instanceof", + "int", + "interface", + "let", + "long", + "mod", + "native", + "new", + "null", + "package", + "private", + "protected", + "public", + "return", + "set", + "short", + "static", + "super", + "switch", + "synchronized", + "this", + "throw", + "throws", + "transient", + "true", + "try", + "typeof", + "var", + "void", + "volatile", + "while", + "with", + "yield", + ]) +}); + +fn add_export( + source: &mut Vec<String>, + name: &str, + initializer: &str, + temp_var_count: &mut usize, +) { + fn is_valid_var_decl(name: &str) -> bool { + // it's ok to be super strict here + if name.is_empty() { + return false; + } + + if let Some(first) = name.chars().next() { + if !first.is_ascii_alphabetic() && first != '_' && first != '$' { + return false; + } + } + + name + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$') + } + + // TODO(bartlomieju): Node actually checks if a given export exists in `exports` object, + // but it might not be necessary here since our analysis is more detailed? + if RESERVED_WORDS.contains(name) || !is_valid_var_decl(name) { + *temp_var_count += 1; + // we can't create an identifier with a reserved word or invalid identifier name, + // so assign it to a temporary variable that won't have a conflict, then re-export + // it as a string + source.push(format!( + "const __deno_export_{temp_var_count}__ = {initializer};" + )); + source.push(format!( + "export {{ __deno_export_{temp_var_count}__ as \"{}\" }};", + escape_for_double_quote_string(name) + )); + } else { + source.push(format!("export const {name} = {initializer};")); + } +} + +fn parse_specifier(specifier: &str) -> Option<(String, String)> { + let mut separator_index = specifier.find('/'); + let mut valid_package_name = true; + // let mut is_scoped = false; + if specifier.is_empty() { + valid_package_name = false; + } else if specifier.starts_with('@') { + // is_scoped = true; + if let Some(index) = separator_index { + separator_index = specifier[index + 1..].find('/').map(|i| i + index + 1); + } else { + valid_package_name = false; + } + } + + let package_name = if let Some(index) = separator_index { + specifier[0..index].to_string() + } else { + specifier.to_string() + }; + + // Package name cannot have leading . and cannot have percent-encoding or separators. + for ch in package_name.chars() { + if ch == '%' || ch == '\\' { + valid_package_name = false; + break; + } + } + + if !valid_package_name { + return None; + } + + let package_subpath = if let Some(index) = separator_index { + format!(".{}", specifier.chars().skip(index).collect::<String>()) + } else { + ".".to_string() + }; + + Some((package_name, package_subpath)) +} + +fn not_found(path: &str, referrer: &Path) -> AnyError { + let msg = format!( + "[ERR_MODULE_NOT_FOUND] Cannot find module \"{}\" imported from \"{}\"", + path, + referrer.to_string_lossy() + ); + std::io::Error::new(std::io::ErrorKind::NotFound, msg).into() +} + +fn escape_for_double_quote_string(text: &str) -> Cow<str> { + // this should be rare, so doing a scan first before allocating is ok + if text.chars().any(|c| matches!(c, '"' | '\\')) { + // don't bother making this more complex for perf because it's rare + Cow::Owned(text.replace('\\', "\\\\").replace('"', "\\\"")) + } else { + Cow::Borrowed(text) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_add_export() { + let mut temp_var_count = 0; + let mut source = vec![]; + + let exports = vec!["static", "server", "app", "dashed-export", "3d"]; + for export in exports { + add_export(&mut source, export, "init", &mut temp_var_count); + } + assert_eq!( + source, + vec![ + "const __deno_export_1__ = init;".to_string(), + "export { __deno_export_1__ as \"static\" };".to_string(), + "export const server = init;".to_string(), + "export const app = init;".to_string(), + "const __deno_export_2__ = init;".to_string(), + "export { __deno_export_2__ as \"dashed-export\" };".to_string(), + "const __deno_export_3__ = init;".to_string(), + "export { __deno_export_3__ as \"3d\" };".to_string(), + ] + ) + } + + #[test] + fn test_parse_specifier() { + assert_eq!( + parse_specifier("@some-package/core/actions"), + Some(("@some-package/core".to_string(), "./actions".to_string())) + ); + } +} |