diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2024-11-01 12:27:00 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-01 12:27:00 -0400 |
commit | 826e42a5b5880c974ae019a7a21aade6a718062c (patch) | |
tree | a46502ecc3c73e4f7fc3a4517d83c7b2f3d0c0d3 /cli/cache | |
parent | 4774eab64d5176e997b6431f03f075782321b3d9 (diff) |
fix: improved support for cjs and cts modules (#26558)
* cts support
* better cjs/cts type checking
* deno compile cjs/cts support
* More efficient detect cjs (going towards stabilization)
* Determination of whether .js, .ts, .jsx, or .tsx is cjs or esm is only
done after loading
* Support `import x = require(...);`
Co-authored-by: Bartek IwaĆczuk <biwanczuk@gmail.com>
Diffstat (limited to 'cli/cache')
-rw-r--r-- | cli/cache/cache_db.rs | 6 | ||||
-rw-r--r-- | cli/cache/emit.rs | 2 | ||||
-rw-r--r-- | cli/cache/mod.rs | 101 | ||||
-rw-r--r-- | cli/cache/module_info.rs | 130 | ||||
-rw-r--r-- | cli/cache/parsed_source.rs | 60 |
5 files changed, 139 insertions, 160 deletions
diff --git a/cli/cache/cache_db.rs b/cli/cache/cache_db.rs index b24078f29..329ed2d97 100644 --- a/cli/cache/cache_db.rs +++ b/cli/cache/cache_db.rs @@ -57,7 +57,7 @@ impl rusqlite::types::FromSql for CacheDBHash { } /// What should the cache should do on failure? -#[derive(Default)] +#[derive(Debug, Default)] pub enum CacheFailure { /// Return errors if failure mode otherwise unspecified. #[default] @@ -69,6 +69,7 @@ pub enum CacheFailure { } /// Configuration SQL and other parameters for a [`CacheDB`]. +#[derive(Debug)] pub struct CacheDBConfiguration { /// SQL to run for a new database. pub table_initializer: &'static str, @@ -98,6 +99,7 @@ impl CacheDBConfiguration { } } +#[derive(Debug)] enum ConnectionState { Connected(Connection), Blackhole, @@ -106,7 +108,7 @@ enum ConnectionState { /// A cache database that eagerly initializes itself off-thread, preventing initialization operations /// from blocking the main thread. -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct CacheDB { // TODO(mmastrac): We can probably simplify our thread-safe implementation here conn: Arc<Mutex<OnceCell<ConnectionState>>>, diff --git a/cli/cache/emit.rs b/cli/cache/emit.rs index 74e1c1101..3c9eecfcb 100644 --- a/cli/cache/emit.rs +++ b/cli/cache/emit.rs @@ -10,6 +10,7 @@ use deno_core::unsync::sync::AtomicFlag; use super::DiskCache; /// The cache that stores previously emitted files. +#[derive(Debug)] pub struct EmitCache { disk_cache: DiskCache, emit_failed_flag: AtomicFlag, @@ -91,6 +92,7 @@ impl EmitCache { const LAST_LINE_PREFIX: &str = "\n// denoCacheMetadata="; +#[derive(Debug)] struct EmitFileSerializer { cli_version: &'static str, } diff --git a/cli/cache/mod.rs b/cli/cache/mod.rs index bf8f1b1f0..50fc135dd 100644 --- a/cli/cache/mod.rs +++ b/cli/cache/mod.rs @@ -8,14 +8,9 @@ use crate::file_fetcher::FetchOptions; use crate::file_fetcher::FetchPermissionsOptionRef; use crate::file_fetcher::FileFetcher; use crate::file_fetcher::FileOrRedirect; -use crate::npm::CliNpmResolver; -use crate::resolver::CliNodeResolver; use crate::util::fs::atomic_write_file_with_retries; use crate::util::fs::atomic_write_file_with_retries_and_fs; use crate::util::fs::AtomicWriteFileFsAdapter; -use crate::util::path::specifier_has_extension; -use crate::util::text_encoding::arc_str_to_bytes; -use crate::util::text_encoding::from_utf8_lossy_owned; use deno_ast::MediaType; use deno_core::futures; @@ -25,7 +20,9 @@ use deno_graph::source::CacheInfo; use deno_graph::source::LoadFuture; use deno_graph::source::LoadResponse; use deno_graph::source::Loader; +use deno_runtime::deno_fs; use deno_runtime::deno_permissions::PermissionsContainer; +use node_resolver::InNpmPackageChecker; use std::collections::HashMap; use std::path::Path; use std::path::PathBuf; @@ -60,7 +57,6 @@ pub use fast_check::FastCheckCache; pub use incremental::IncrementalCache; pub use module_info::ModuleInfoCache; pub use node::NodeAnalysisCache; -pub use parsed_source::EsmOrCjsChecker; pub use parsed_source::LazyGraphSourceParser; pub use parsed_source::ParsedSourceCache; @@ -181,46 +177,40 @@ pub struct FetchCacherOptions { pub permissions: PermissionsContainer, /// If we're publishing for `deno publish`. pub is_deno_publish: bool, - pub unstable_detect_cjs: bool, } /// A "wrapper" for the FileFetcher and DiskCache for the Deno CLI that provides /// a concise interface to the DENO_DIR when building module graphs. pub struct FetchCacher { pub file_header_overrides: HashMap<ModuleSpecifier, HashMap<String, String>>, - esm_or_cjs_checker: Arc<EsmOrCjsChecker>, file_fetcher: Arc<FileFetcher>, + fs: Arc<dyn deno_fs::FileSystem>, global_http_cache: Arc<GlobalHttpCache>, - node_resolver: Arc<CliNodeResolver>, - npm_resolver: Arc<dyn CliNpmResolver>, + in_npm_pkg_checker: Arc<dyn InNpmPackageChecker>, module_info_cache: Arc<ModuleInfoCache>, permissions: PermissionsContainer, is_deno_publish: bool, - unstable_detect_cjs: bool, cache_info_enabled: bool, } impl FetchCacher { pub fn new( - esm_or_cjs_checker: Arc<EsmOrCjsChecker>, file_fetcher: Arc<FileFetcher>, + fs: Arc<dyn deno_fs::FileSystem>, global_http_cache: Arc<GlobalHttpCache>, - node_resolver: Arc<CliNodeResolver>, - npm_resolver: Arc<dyn CliNpmResolver>, + in_npm_pkg_checker: Arc<dyn InNpmPackageChecker>, module_info_cache: Arc<ModuleInfoCache>, options: FetchCacherOptions, ) -> Self { Self { file_fetcher, - esm_or_cjs_checker, + fs, global_http_cache, - node_resolver, - npm_resolver, + in_npm_pkg_checker, module_info_cache, file_header_overrides: options.file_header_overrides, permissions: options.permissions, is_deno_publish: options.is_deno_publish, - unstable_detect_cjs: options.unstable_detect_cjs, cache_info_enabled: false, } } @@ -271,70 +261,23 @@ impl Loader for FetchCacher { ) -> LoadFuture { use deno_graph::source::CacheSetting as LoaderCacheSetting; - if specifier.scheme() == "file" { - if specifier.path().contains("/node_modules/") { - // The specifier might be in a completely different symlinked tree than - // what the node_modules url is in (ex. `/my-project-1/node_modules` - // symlinked to `/my-project-2/node_modules`), so first we checked if the path - // is in a node_modules dir to avoid needlessly canonicalizing, then now compare - // against the canonicalized specifier. - let specifier = - crate::node::resolve_specifier_into_node_modules(specifier); - if self.npm_resolver.in_npm_package(&specifier) { - return Box::pin(futures::future::ready(Ok(Some( - LoadResponse::External { specifier }, - )))); - } - } - - // make local CJS modules external to the graph - if specifier_has_extension(specifier, "cjs") { + if specifier.scheme() == "file" + && specifier.path().contains("/node_modules/") + { + // The specifier might be in a completely different symlinked tree than + // what the node_modules url is in (ex. `/my-project-1/node_modules` + // symlinked to `/my-project-2/node_modules`), so first we checked if the path + // is in a node_modules dir to avoid needlessly canonicalizing, then now compare + // against the canonicalized specifier. + let specifier = crate::node::resolve_specifier_into_node_modules( + specifier, + self.fs.as_ref(), + ); + if self.in_npm_pkg_checker.in_npm_package(&specifier) { return Box::pin(futures::future::ready(Ok(Some( - LoadResponse::External { - specifier: specifier.clone(), - }, + LoadResponse::External { specifier }, )))); } - - if self.unstable_detect_cjs && specifier_has_extension(specifier, "js") { - if let Ok(Some(pkg_json)) = - self.node_resolver.get_closest_package_json(specifier) - { - if pkg_json.typ == "commonjs" { - if let Ok(path) = specifier.to_file_path() { - if let Ok(bytes) = std::fs::read(&path) { - let text: Arc<str> = from_utf8_lossy_owned(bytes).into(); - let is_es_module = match self.esm_or_cjs_checker.is_esm( - specifier, - text.clone(), - MediaType::JavaScript, - ) { - Ok(value) => value, - Err(err) => { - return Box::pin(futures::future::ready(Err(err.into()))); - } - }; - if !is_es_module { - self.node_resolver.mark_cjs_resolution(specifier.clone()); - return Box::pin(futures::future::ready(Ok(Some( - LoadResponse::External { - specifier: specifier.clone(), - }, - )))); - } else { - return Box::pin(futures::future::ready(Ok(Some( - LoadResponse::Module { - specifier: specifier.clone(), - content: arc_str_to_bytes(text), - maybe_headers: None, - }, - )))); - } - } - } - } - } - } } if self.is_deno_publish diff --git a/cli/cache/module_info.rs b/cli/cache/module_info.rs index 4dbb01c37..060a6f4f0 100644 --- a/cli/cache/module_info.rs +++ b/cli/cache/module_info.rs @@ -44,18 +44,32 @@ pub static MODULE_INFO_CACHE_DB: CacheDBConfiguration = CacheDBConfiguration { /// A cache of `deno_graph::ModuleInfo` objects. Using this leads to a considerable /// performance improvement because when it exists we can skip parsing a module for /// deno_graph. +#[derive(Debug)] pub struct ModuleInfoCache { conn: CacheDB, + parsed_source_cache: Arc<ParsedSourceCache>, } impl ModuleInfoCache { #[cfg(test)] - pub fn new_in_memory(version: &'static str) -> Self { - Self::new(CacheDB::in_memory(&MODULE_INFO_CACHE_DB, version)) + pub fn new_in_memory( + version: &'static str, + parsed_source_cache: Arc<ParsedSourceCache>, + ) -> Self { + Self::new( + CacheDB::in_memory(&MODULE_INFO_CACHE_DB, version), + parsed_source_cache, + ) } - pub fn new(conn: CacheDB) -> Self { - Self { conn } + pub fn new( + conn: CacheDB, + parsed_source_cache: Arc<ParsedSourceCache>, + ) -> Self { + Self { + conn, + parsed_source_cache, + } } /// Useful for testing: re-create this cache DB with a different current version. @@ -63,6 +77,7 @@ impl ModuleInfoCache { pub(crate) fn recreate_with_version(self, version: &'static str) -> Self { Self { conn: self.conn.recreate_with_version(version), + parsed_source_cache: self.parsed_source_cache, } } @@ -113,13 +128,10 @@ impl ModuleInfoCache { Ok(()) } - pub fn as_module_analyzer<'a>( - &'a self, - parsed_source_cache: &'a Arc<ParsedSourceCache>, - ) -> ModuleInfoCacheModuleAnalyzer<'a> { + pub fn as_module_analyzer(&self) -> ModuleInfoCacheModuleAnalyzer { ModuleInfoCacheModuleAnalyzer { module_info_cache: self, - parsed_source_cache, + parsed_source_cache: &self.parsed_source_cache, } } } @@ -129,31 +141,99 @@ pub struct ModuleInfoCacheModuleAnalyzer<'a> { parsed_source_cache: &'a Arc<ParsedSourceCache>, } -#[async_trait::async_trait(?Send)] -impl<'a> deno_graph::ModuleAnalyzer for ModuleInfoCacheModuleAnalyzer<'a> { - async fn analyze( +impl<'a> ModuleInfoCacheModuleAnalyzer<'a> { + fn load_cached_module_info( &self, specifier: &ModuleSpecifier, - source: Arc<str>, media_type: MediaType, - ) -> Result<ModuleInfo, deno_ast::ParseDiagnostic> { - // attempt to load from the cache - let source_hash = CacheDBHash::from_source(&source); + source_hash: CacheDBHash, + ) -> Option<ModuleInfo> { match self.module_info_cache.get_module_info( specifier, media_type, source_hash, ) { - Ok(Some(info)) => return Ok(info), - Ok(None) => {} + Ok(Some(info)) => Some(info), + Ok(None) => None, Err(err) => { log::debug!( "Error loading module cache info for {}. {:#}", specifier, err ); + None } } + } + + fn save_module_info_to_cache( + &self, + specifier: &ModuleSpecifier, + media_type: MediaType, + source_hash: CacheDBHash, + module_info: &ModuleInfo, + ) { + if let Err(err) = self.module_info_cache.set_module_info( + specifier, + media_type, + source_hash, + module_info, + ) { + log::debug!( + "Error saving module cache info for {}. {:#}", + specifier, + err + ); + } + } + + pub fn analyze_sync( + &self, + specifier: &ModuleSpecifier, + media_type: MediaType, + source: &Arc<str>, + ) -> Result<ModuleInfo, deno_ast::ParseDiagnostic> { + // attempt to load from the cache + let source_hash = CacheDBHash::from_source(source); + if let Some(info) = + self.load_cached_module_info(specifier, media_type, source_hash) + { + return Ok(info); + } + + // otherwise, get the module info from the parsed source cache + let parser = self.parsed_source_cache.as_capturing_parser(); + let analyzer = ParserModuleAnalyzer::new(&parser); + let module_info = + analyzer.analyze_sync(specifier, source.clone(), media_type)?; + + // then attempt to cache it + self.save_module_info_to_cache( + specifier, + media_type, + source_hash, + &module_info, + ); + + Ok(module_info) + } +} + +#[async_trait::async_trait(?Send)] +impl<'a> deno_graph::ModuleAnalyzer for ModuleInfoCacheModuleAnalyzer<'a> { + async fn analyze( + &self, + specifier: &ModuleSpecifier, + source: Arc<str>, + media_type: MediaType, + ) -> Result<ModuleInfo, deno_ast::ParseDiagnostic> { + // attempt to load from the cache + let source_hash = CacheDBHash::from_source(&source); + if let Some(info) = + self.load_cached_module_info(specifier, media_type, source_hash) + { + return Ok(info); + } // otherwise, get the module info from the parsed source cache let module_info = deno_core::unsync::spawn_blocking({ @@ -169,18 +249,12 @@ impl<'a> deno_graph::ModuleAnalyzer for ModuleInfoCacheModuleAnalyzer<'a> { .unwrap()?; // then attempt to cache it - if let Err(err) = self.module_info_cache.set_module_info( + self.save_module_info_to_cache( specifier, media_type, source_hash, &module_info, - ) { - log::debug!( - "Error saving module cache info for {}. {:#}", - specifier, - err - ); - } + ); Ok(module_info) } @@ -202,7 +276,7 @@ fn serialize_media_type(media_type: MediaType) -> i64 { Tsx => 11, Json => 12, Wasm => 13, - TsBuildInfo => 14, + Css => 14, SourceMap => 15, Unknown => 16, } @@ -217,7 +291,7 @@ mod test { #[test] pub fn module_info_cache_general_use() { - let cache = ModuleInfoCache::new_in_memory("1.0.0"); + let cache = ModuleInfoCache::new_in_memory("1.0.0", Default::default()); let specifier1 = ModuleSpecifier::parse("https://localhost/mod.ts").unwrap(); let specifier2 = diff --git a/cli/cache/parsed_source.rs b/cli/cache/parsed_source.rs index df6e45c35..7e819ae99 100644 --- a/cli/cache/parsed_source.rs +++ b/cli/cache/parsed_source.rs @@ -5,12 +5,11 @@ use std::sync::Arc; use deno_ast::MediaType; use deno_ast::ModuleSpecifier; -use deno_ast::ParseDiagnostic; use deno_ast::ParsedSource; use deno_core::parking_lot::Mutex; -use deno_graph::CapturingModuleParser; -use deno_graph::DefaultModuleParser; -use deno_graph::ModuleParser; +use deno_graph::CapturingEsParser; +use deno_graph::DefaultEsParser; +use deno_graph::EsParser; use deno_graph::ParseOptions; use deno_graph::ParsedSourceStore; @@ -47,7 +46,7 @@ impl<'a> LazyGraphSourceParser<'a> { } } -#[derive(Default)] +#[derive(Debug, Default)] pub struct ParsedSourceCache { sources: Mutex<HashMap<ModuleSpecifier, ParsedSource>>, } @@ -58,12 +57,11 @@ impl ParsedSourceCache { module: &deno_graph::JsModule, ) -> Result<ParsedSource, deno_ast::ParseDiagnostic> { let parser = self.as_capturing_parser(); - // this will conditionally parse because it's using a CapturingModuleParser - parser.parse_module(ParseOptions { + // this will conditionally parse because it's using a CapturingEsParser + parser.parse_program(ParseOptions { specifier: &module.specifier, source: module.source.clone(), media_type: module.media_type, - // don't bother enabling because this method is currently only used for vendoring scope_analysis: false, }) } @@ -87,10 +85,9 @@ impl ParsedSourceCache { specifier, source, media_type, - // don't bother enabling because this method is currently only used for emitting scope_analysis: false, }; - DefaultModuleParser.parse_module(options) + DefaultEsParser.parse_program(options) } /// Frees the parsed source from memory. @@ -100,8 +97,8 @@ impl ParsedSourceCache { /// Creates a parser that will reuse a ParsedSource from the store /// if it exists, or else parse. - pub fn as_capturing_parser(&self) -> CapturingModuleParser { - CapturingModuleParser::new(None, self) + pub fn as_capturing_parser(&self) -> CapturingEsParser { + CapturingEsParser::new(None, self) } } @@ -150,42 +147,3 @@ impl deno_graph::ParsedSourceStore for ParsedSourceCache { } } } - -pub struct EsmOrCjsChecker { - parsed_source_cache: Arc<ParsedSourceCache>, -} - -impl EsmOrCjsChecker { - pub fn new(parsed_source_cache: Arc<ParsedSourceCache>) -> Self { - Self { - parsed_source_cache, - } - } - - pub fn is_esm( - &self, - specifier: &ModuleSpecifier, - source: Arc<str>, - media_type: MediaType, - ) -> Result<bool, ParseDiagnostic> { - // todo(dsherret): add a file cache here to avoid parsing with swc on each run - let source = match self.parsed_source_cache.get_parsed_source(specifier) { - Some(source) => source.clone(), - None => { - let source = deno_ast::parse_program(deno_ast::ParseParams { - specifier: specifier.clone(), - text: source, - media_type, - capture_tokens: true, // capture because it's used for cjs export analysis - scope_analysis: false, - maybe_syntax: None, - })?; - self - .parsed_source_cache - .set_parsed_source(specifier.clone(), source.clone()); - source - } - }; - Ok(source.is_module()) - } -} |