summaryrefslogtreecommitdiff
path: root/cli/node/analyze.rs
diff options
context:
space:
mode:
authorDavid Sherret <dsherret@users.noreply.github.com>2023-04-14 16:22:33 -0400
committerGitHub <noreply@github.com>2023-04-14 16:22:33 -0400
commit136dce67cec749dce5989ea29e88359ef79a0045 (patch)
tree38e96bbbf22dc06cdba418a35467b215f1335549 /cli/node/analyze.rs
parenta4111442191fff300132259752e6d2d5613d1871 (diff)
refactor: break up `ProcState` (#18707)
1. Breaks up functionality within `ProcState` into several other structs to break out the responsibilities (`ProcState` is only a data struct now). 2. Moves towards being able to inject dependencies more easily and have functionality only require what it needs. 3. Exposes `Arc<T>` around the "service structs" instead of it being embedded within them. The idea behind embedding them was to reduce the verbosity of needing to pass around `Arc<...>`, but I don't think it was exactly working and as we move more of these structs to be more injectable I don't think the extra verbosity will be a big deal.
Diffstat (limited to 'cli/node/analyze.rs')
-rw-r--r--cli/node/analyze.rs534
1 files changed, 508 insertions, 26 deletions
diff --git a/cli/node/analyze.rs b/cli/node/analyze.rs
index 4040c5a2b..f93e9fa91 100644
--- a/cli/node/analyze.rs
+++ b/cli/node/analyze.rs
@@ -1,18 +1,36 @@
// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
use std::collections::HashSet;
+use std::collections::VecDeque;
+use std::fmt::Write;
+use std::path::Path;
+use std::path::PathBuf;
+use std::sync::Arc;
use deno_ast::swc::common::SyntaxContext;
use deno_ast::view::Node;
use deno_ast::view::NodeTrait;
+use deno_ast::CjsAnalysis;
+use deno_ast::MediaType;
use deno_ast::ModuleSpecifier;
use deno_ast::ParsedSource;
use deno_ast::SourceRanged;
+use deno_core::anyhow::anyhow;
use deno_core::error::AnyError;
+use deno_runtime::deno_node::package_exports_resolve;
+use deno_runtime::deno_node::NodeModuleKind;
+use deno_runtime::deno_node::NodePermissions;
+use deno_runtime::deno_node::NodeResolutionMode;
+use deno_runtime::deno_node::PackageJson;
+use deno_runtime::deno_node::PathClean;
+use deno_runtime::deno_node::RealFs;
+use deno_runtime::deno_node::RequireNpmResolver;
use deno_runtime::deno_node::NODE_GLOBAL_THIS_NAME;
-use std::fmt::Write;
+use once_cell::sync::Lazy;
use crate::cache::NodeAnalysisCache;
+use crate::file_fetcher::FileFetcher;
+use crate::npm::NpmPackageResolver;
static NODE_GLOBALS: &[&str] = &[
"Buffer",
@@ -27,18 +45,287 @@ static NODE_GLOBALS: &[&str] = &[
"setTimeout",
];
-// TODO(dsherret): this code is way more inefficient than it needs to be.
-//
-// In the future, we should disable capturing tokens & scope analysis
-// and instead only use swc's APIs to go through the portions of the tree
-// that we know will affect the global scope while still ensuring that
-// `var` decls are taken into consideration.
+pub struct NodeCodeTranslator {
+ analysis_cache: NodeAnalysisCache,
+ file_fetcher: Arc<FileFetcher>,
+ npm_resolver: Arc<NpmPackageResolver>,
+}
+
+impl NodeCodeTranslator {
+ pub fn new(
+ analysis_cache: NodeAnalysisCache,
+ file_fetcher: Arc<FileFetcher>,
+ npm_resolver: Arc<NpmPackageResolver>,
+ ) -> Self {
+ Self {
+ analysis_cache,
+ file_fetcher,
+ npm_resolver,
+ }
+ }
+
+ pub fn esm_code_with_node_globals(
+ &self,
+ specifier: &ModuleSpecifier,
+ code: String,
+ ) -> Result<String, AnyError> {
+ esm_code_with_node_globals(&self.analysis_cache, specifier, code)
+ }
+
+ /// Translates given CJS module into ESM. This function will perform static
+ /// analysis on the file to find defined exports and reexports.
+ ///
+ /// For all discovered reexports the analysis will be performed recursively.
+ ///
+ /// If successful a source code for equivalent ES module is returned.
+ pub fn translate_cjs_to_esm(
+ &self,
+ specifier: &ModuleSpecifier,
+ code: String,
+ media_type: MediaType,
+ permissions: &mut dyn NodePermissions,
+ ) -> Result<String, AnyError> {
+ let mut temp_var_count = 0;
+ let mut handled_reexports: HashSet<String> = HashSet::default();
+
+ let mut source = vec![
+ r#"import {createRequire as __internalCreateRequire} from "node:module";
+ const require = __internalCreateRequire(import.meta.url);"#
+ .to_string(),
+ ];
+
+ let analysis =
+ self.perform_cjs_analysis(specifier.as_str(), media_type, code)?;
+
+ let mut all_exports = analysis
+ .exports
+ .iter()
+ .map(|s| s.to_string())
+ .collect::<HashSet<_>>();
+
+ // (request, referrer)
+ let mut reexports_to_handle = VecDeque::new();
+ for reexport in analysis.reexports {
+ reexports_to_handle.push_back((reexport, specifier.clone()));
+ }
+
+ while let Some((reexport, referrer)) = reexports_to_handle.pop_front() {
+ if handled_reexports.contains(&reexport) {
+ continue;
+ }
+
+ handled_reexports.insert(reexport.to_string());
+
+ // First, resolve relate reexport specifier
+ let resolved_reexport = self.resolve(
+ &reexport,
+ &referrer,
+ // FIXME(bartlomieju): check if these conditions are okay, probably
+ // should be `deno-require`, because `deno` is already used in `esm_resolver.rs`
+ &["deno", "require", "default"],
+ NodeResolutionMode::Execution,
+ permissions,
+ )?;
+ let reexport_specifier =
+ ModuleSpecifier::from_file_path(resolved_reexport).unwrap();
+ // Second, read the source code from disk
+ let reexport_file = self
+ .file_fetcher
+ .get_source(&reexport_specifier)
+ .ok_or_else(|| {
+ anyhow!(
+ "Could not find '{}' ({}) referenced from {}",
+ reexport,
+ reexport_specifier,
+ referrer
+ )
+ })?;
+
+ {
+ let analysis = self.perform_cjs_analysis(
+ reexport_specifier.as_str(),
+ reexport_file.media_type,
+ reexport_file.source.to_string(),
+ )?;
+
+ for reexport in analysis.reexports {
+ reexports_to_handle.push_back((reexport, reexport_specifier.clone()));
+ }
+
+ all_exports.extend(
+ analysis
+ .exports
+ .into_iter()
+ .filter(|e| e.as_str() != "default"),
+ );
+ }
+ }
+
+ source.push(format!(
+ "const mod = require(\"{}\");",
+ specifier
+ .to_file_path()
+ .unwrap()
+ .to_str()
+ .unwrap()
+ .replace('\\', "\\\\")
+ .replace('\'', "\\\'")
+ .replace('\"', "\\\"")
+ ));
+
+ for export in &all_exports {
+ if export.as_str() != "default" {
+ add_export(
+ &mut source,
+ export,
+ &format!("mod[\"{export}\"]"),
+ &mut temp_var_count,
+ );
+ }
+ }
+
+ source.push("export default mod;".to_string());
+
+ let translated_source = source.join("\n");
+ Ok(translated_source)
+ }
+
+ fn perform_cjs_analysis(
+ &self,
+ specifier: &str,
+ media_type: MediaType,
+ code: String,
+ ) -> Result<CjsAnalysis, AnyError> {
+ let source_hash = NodeAnalysisCache::compute_source_hash(&code);
+ if let Some(analysis) = self
+ .analysis_cache
+ .get_cjs_analysis(specifier, &source_hash)
+ {
+ return Ok(analysis);
+ }
+
+ if media_type == MediaType::Json {
+ return Ok(CjsAnalysis {
+ exports: vec![],
+ reexports: vec![],
+ });
+ }
+
+ let parsed_source = deno_ast::parse_script(deno_ast::ParseParams {
+ specifier: specifier.to_string(),
+ text_info: deno_ast::SourceTextInfo::new(code.into()),
+ media_type,
+ capture_tokens: true,
+ scope_analysis: false,
+ maybe_syntax: None,
+ })?;
+ let analysis = parsed_source.analyze_cjs();
+ self
+ .analysis_cache
+ .set_cjs_analysis(specifier, &source_hash, &analysis);
+
+ Ok(analysis)
+ }
+
+ fn resolve(
+ &self,
+ specifier: &str,
+ referrer: &ModuleSpecifier,
+ conditions: &[&str],
+ mode: NodeResolutionMode,
+ permissions: &mut dyn NodePermissions,
+ ) -> Result<PathBuf, AnyError> {
+ if specifier.starts_with('/') {
+ todo!();
+ }
+
+ let referrer_path = referrer.to_file_path().unwrap();
+ if specifier.starts_with("./") || specifier.starts_with("../") {
+ if let Some(parent) = referrer_path.parent() {
+ return file_extension_probe(parent.join(specifier), &referrer_path);
+ } else {
+ todo!();
+ }
+ }
+
+ // We've got a bare specifier or maybe bare_specifier/blah.js"
+
+ let (package_specifier, package_subpath) =
+ parse_specifier(specifier).unwrap();
+
+ // todo(dsherret): use not_found error on not found here
+ let resolver = self.npm_resolver.as_require_npm_resolver();
+ let module_dir = resolver.resolve_package_folder_from_package(
+ package_specifier.as_str(),
+ &referrer_path,
+ mode,
+ )?;
+
+ let package_json_path = module_dir.join("package.json");
+ if package_json_path.exists() {
+ let package_json = PackageJson::load::<RealFs>(
+ &self.npm_resolver.as_require_npm_resolver(),
+ permissions,
+ package_json_path.clone(),
+ )?;
+
+ if let Some(exports) = &package_json.exports {
+ return package_exports_resolve::<RealFs>(
+ &package_json_path,
+ package_subpath,
+ exports,
+ referrer,
+ NodeModuleKind::Esm,
+ conditions,
+ mode,
+ &self.npm_resolver.as_require_npm_resolver(),
+ permissions,
+ );
+ }
+
+ // old school
+ if package_subpath != "." {
+ let d = module_dir.join(package_subpath);
+ if let Ok(m) = d.metadata() {
+ if m.is_dir() {
+ // subdir might have a package.json that specifies the entrypoint
+ let package_json_path = d.join("package.json");
+ if package_json_path.exists() {
+ let package_json = PackageJson::load::<RealFs>(
+ &self.npm_resolver.as_require_npm_resolver(),
+ permissions,
+ package_json_path,
+ )?;
+ if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
+ return Ok(d.join(main).clean());
+ }
+ }
+
+ return Ok(d.join("index.js").clean());
+ }
+ }
+ return file_extension_probe(d, &referrer_path);
+ } else if let Some(main) = package_json.main(NodeModuleKind::Cjs) {
+ return Ok(module_dir.join(main).clean());
+ } else {
+ return Ok(module_dir.join("index.js").clean());
+ }
+ }
+ Err(not_found(specifier, &referrer_path))
+ }
+}
-pub fn esm_code_with_node_globals(
+fn esm_code_with_node_globals(
analysis_cache: &NodeAnalysisCache,
specifier: &ModuleSpecifier,
code: String,
) -> Result<String, AnyError> {
+ // TODO(dsherret): this code is way more inefficient than it needs to be.
+ //
+ // In the future, we should disable capturing tokens & scope analysis
+ // and instead only use swc's APIs to go through the portions of the tree
+ // that we know will affect the global scope while still ensuring that
+ // `var` decls are taken into consideration.
let source_hash = NodeAnalysisCache::compute_source_hash(&code);
let text_info = deno_ast::SourceTextInfo::from_string(code);
let top_level_decls = if let Some(decls) =
@@ -63,6 +350,16 @@ pub fn esm_code_with_node_globals(
top_level_decls
};
+ Ok(esm_code_from_top_level_decls(
+ text_info.text_str(),
+ &top_level_decls,
+ ))
+}
+
+fn esm_code_from_top_level_decls(
+ file_text: &str,
+ top_level_decls: &HashSet<String>,
+) -> String {
let mut globals = Vec::with_capacity(NODE_GLOBALS.len());
let has_global_this = top_level_decls.contains("globalThis");
for global in NODE_GLOBALS.iter() {
@@ -83,7 +380,6 @@ pub fn esm_code_with_node_globals(
write!(result, "var {global} = {global_this_expr}.{global};").unwrap();
}
- let file_text = text_info.text_str();
// strip the shebang
let file_text = if file_text.starts_with("#!/") {
let start_index = file_text.find('\n').unwrap_or(file_text.len());
@@ -93,12 +389,28 @@ pub fn esm_code_with_node_globals(
};
result.push_str(file_text);
- Ok(result)
+ result
}
fn analyze_top_level_decls(
parsed_source: &ParsedSource,
) -> Result<HashSet<String>, AnyError> {
+ fn visit_children(
+ node: Node,
+ top_level_context: SyntaxContext,
+ results: &mut HashSet<String>,
+ ) {
+ if let Node::Ident(ident) = node {
+ if ident.ctxt() == top_level_context && is_local_declaration_ident(node) {
+ results.insert(ident.sym().to_string());
+ }
+ }
+
+ for child in node.children() {
+ visit_children(child, top_level_context, results);
+ }
+ }
+
let top_level_context = parsed_source.top_level_context();
parsed_source.with_view(|program| {
@@ -108,22 +420,6 @@ fn analyze_top_level_decls(
})
}
-fn visit_children(
- node: Node,
- top_level_context: SyntaxContext,
- results: &mut HashSet<String>,
-) {
- if let Node::Ident(ident) = node {
- if ident.ctxt() == top_level_context && is_local_declaration_ident(node) {
- results.insert(ident.sym().to_string());
- }
- }
-
- for child in node.children() {
- visit_children(child, top_level_context, results);
- }
-}
-
fn is_local_declaration_ident(node: Node) -> bool {
if let Some(parent) = node.parent() {
match parent {
@@ -160,6 +456,162 @@ fn is_local_declaration_ident(node: Node) -> bool {
}
}
+static RESERVED_WORDS: Lazy<HashSet<&str>> = Lazy::new(|| {
+ HashSet::from([
+ "break",
+ "case",
+ "catch",
+ "class",
+ "const",
+ "continue",
+ "debugger",
+ "default",
+ "delete",
+ "do",
+ "else",
+ "export",
+ "extends",
+ "false",
+ "finally",
+ "for",
+ "function",
+ "if",
+ "import",
+ "in",
+ "instanceof",
+ "new",
+ "null",
+ "return",
+ "super",
+ "switch",
+ "this",
+ "throw",
+ "true",
+ "try",
+ "typeof",
+ "var",
+ "void",
+ "while",
+ "with",
+ "yield",
+ "let",
+ "enum",
+ "implements",
+ "interface",
+ "package",
+ "private",
+ "protected",
+ "public",
+ "static",
+ ])
+});
+
+fn add_export(
+ source: &mut Vec<String>,
+ name: &str,
+ initializer: &str,
+ temp_var_count: &mut usize,
+) {
+ fn is_valid_var_decl(name: &str) -> bool {
+ // it's ok to be super strict here
+ name
+ .chars()
+ .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '$')
+ }
+
+ // TODO(bartlomieju): Node actually checks if a given export exists in `exports` object,
+ // but it might not be necessary here since our analysis is more detailed?
+ if RESERVED_WORDS.contains(name) || !is_valid_var_decl(name) {
+ *temp_var_count += 1;
+ // we can't create an identifier with a reserved word or invalid identifier name,
+ // so assign it to a temporary variable that won't have a conflict, then re-export
+ // it as a string
+ source.push(format!(
+ "const __deno_export_{temp_var_count}__ = {initializer};"
+ ));
+ source.push(format!(
+ "export {{ __deno_export_{temp_var_count}__ as \"{name}\" }};"
+ ));
+ } else {
+ source.push(format!("export const {name} = {initializer};"));
+ }
+}
+
+fn parse_specifier(specifier: &str) -> Option<(String, String)> {
+ let mut separator_index = specifier.find('/');
+ let mut valid_package_name = true;
+ // let mut is_scoped = false;
+ if specifier.is_empty() {
+ valid_package_name = false;
+ } else if specifier.starts_with('@') {
+ // is_scoped = true;
+ if let Some(index) = separator_index {
+ separator_index = specifier[index + 1..].find('/').map(|i| i + index + 1);
+ } else {
+ valid_package_name = false;
+ }
+ }
+
+ let package_name = if let Some(index) = separator_index {
+ specifier[0..index].to_string()
+ } else {
+ specifier.to_string()
+ };
+
+ // Package name cannot have leading . and cannot have percent-encoding or separators.
+ for ch in package_name.chars() {
+ if ch == '%' || ch == '\\' {
+ valid_package_name = false;
+ break;
+ }
+ }
+
+ if !valid_package_name {
+ return None;
+ }
+
+ let package_subpath = if let Some(index) = separator_index {
+ format!(".{}", specifier.chars().skip(index).collect::<String>())
+ } else {
+ ".".to_string()
+ };
+
+ Some((package_name, package_subpath))
+}
+
+fn file_extension_probe(
+ p: PathBuf,
+ referrer: &Path,
+) -> Result<PathBuf, AnyError> {
+ let p = p.clean();
+ if p.exists() {
+ let file_name = p.file_name().unwrap();
+ let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
+ if p_js.exists() && p_js.is_file() {
+ return Ok(p_js);
+ } else if p.is_dir() {
+ return Ok(p.join("index.js"));
+ } else {
+ return Ok(p);
+ }
+ } else if let Some(file_name) = p.file_name() {
+ let p_js = p.with_file_name(format!("{}.js", file_name.to_str().unwrap()));
+ if p_js.exists() && p_js.is_file() {
+ return Ok(p_js);
+ }
+ }
+ Err(not_found(&p.to_string_lossy(), referrer))
+}
+
+fn not_found(path: &str, referrer: &Path) -> AnyError {
+ let msg = format!(
+ "[ERR_MODULE_NOT_FOUND] Cannot find module \"{}\" imported from \"{}\"",
+ path,
+ referrer.to_string_lossy()
+ );
+ std::io::Error::new(std::io::ErrorKind::NotFound, msg).into()
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -205,4 +657,34 @@ mod tests {
)
);
}
+
+ #[test]
+ fn test_add_export() {
+ let mut temp_var_count = 0;
+ let mut source = vec![];
+
+ let exports = vec!["static", "server", "app", "dashed-export"];
+ for export in exports {
+ add_export(&mut source, export, "init", &mut temp_var_count);
+ }
+ assert_eq!(
+ source,
+ vec![
+ "const __deno_export_1__ = init;".to_string(),
+ "export { __deno_export_1__ as \"static\" };".to_string(),
+ "export const server = init;".to_string(),
+ "export const app = init;".to_string(),
+ "const __deno_export_2__ = init;".to_string(),
+ "export { __deno_export_2__ as \"dashed-export\" };".to_string(),
+ ]
+ )
+ }
+
+ #[test]
+ fn test_parse_specifier() {
+ assert_eq!(
+ parse_specifier("@some-package/core/actions"),
+ Some(("@some-package/core".to_string(), "./actions".to_string()))
+ );
+ }
}