diff options
author | haturau <135221985+haturatu@users.noreply.github.com> | 2024-11-20 01:20:47 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-20 01:20:47 +0900 |
commit | 85719a67e59c7aa45bead26e4942d7df8b1b42d4 (patch) | |
tree | face0aecaac53e93ce2f23b53c48859bcf1a36ec /cli/standalone | |
parent | 67697bc2e4a62a9670699fd18ad0dd8efc5bd955 (diff) | |
parent | 186b52731c6bb326c4d32905c5e732d082e83465 (diff) |
Merge branch 'denoland:main' into main
Diffstat (limited to 'cli/standalone')
-rw-r--r-- | cli/standalone/binary.rs | 514 | ||||
-rw-r--r-- | cli/standalone/code_cache.rs | 514 | ||||
-rw-r--r-- | cli/standalone/file_system.rs | 4 | ||||
-rw-r--r-- | cli/standalone/mod.rs | 586 | ||||
-rw-r--r-- | cli/standalone/serialization.rs | 661 | ||||
-rw-r--r-- | cli/standalone/virtual_fs.rs | 118 |
6 files changed, 1965 insertions, 432 deletions
diff --git a/cli/standalone/binary.rs b/cli/standalone/binary.rs index 1290a238f..37753bafc 100644 --- a/cli/standalone/binary.rs +++ b/cli/standalone/binary.rs @@ -9,14 +9,19 @@ use std::ffi::OsString; use std::fs; use std::fs::File; use std::future::Future; +use std::io::ErrorKind; use std::io::Read; use std::io::Seek; use std::io::SeekFrom; use std::io::Write; +use std::ops::Range; use std::path::Path; use std::path::PathBuf; use std::process::Command; +use std::sync::Arc; +use deno_ast::MediaType; +use deno_ast::ModuleKind; use deno_ast::ModuleSpecifier; use deno_config::workspace::PackageJsonDepResolution; use deno_config::workspace::ResolverWorkspaceJsrPackage; @@ -30,13 +35,23 @@ use deno_core::futures::AsyncReadExt; use deno_core::futures::AsyncSeekExt; use deno_core::serde_json; use deno_core::url::Url; +use deno_graph::source::RealFileSystem; +use deno_graph::ModuleGraph; +use deno_npm::resolution::SerializedNpmResolutionSnapshot; +use deno_npm::resolution::SerializedNpmResolutionSnapshotPackage; +use deno_npm::resolution::ValidSerializedNpmResolutionSnapshot; +use deno_npm::NpmPackageId; use deno_npm::NpmSystemInfo; +use deno_runtime::deno_fs; +use deno_runtime::deno_fs::FileSystem; +use deno_runtime::deno_fs::RealFs; +use deno_runtime::deno_io::fs::FsError; use deno_runtime::deno_node::PackageJson; +use deno_runtime::ops::otel::OtelConfig; use deno_semver::npm::NpmVersionReqParseError; use deno_semver::package::PackageReq; use deno_semver::Version; use deno_semver::VersionReqSpecifierParseError; -use eszip::EszipRelativeFileBaseUrl; use indexmap::IndexMap; use log::Level; use serde::Deserialize; @@ -49,10 +64,13 @@ use crate::args::NpmInstallDepsProvider; use crate::args::PermissionFlags; use crate::args::UnstableConfig; use crate::cache::DenoDir; +use crate::cache::FastInsecureHasher; +use crate::emit::Emitter; use crate::file_fetcher::FileFetcher; use crate::http_util::HttpClientProvider; use crate::npm::CliNpmResolver; use crate::npm::InnerCliNpmResolverRef; +use crate::resolver::CjsTracker; use crate::shared::ReleaseChannel; use crate::standalone::virtual_fs::VfsEntry; use crate::util::archive; @@ -60,12 +78,63 @@ use crate::util::fs::canonicalize_path_maybe_not_exists; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; +use super::file_system::DenoCompileFileSystem; +use super::serialization::deserialize_binary_data_section; +use super::serialization::serialize_binary_data_section; +use super::serialization::DenoCompileModuleData; +use super::serialization::DeserializedDataSection; +use super::serialization::RemoteModulesStore; +use super::serialization::RemoteModulesStoreBuilder; use super::virtual_fs::FileBackedVfs; use super::virtual_fs::VfsBuilder; use super::virtual_fs::VfsRoot; use super::virtual_fs::VirtualDirectory; -const MAGIC_TRAILER: &[u8; 8] = b"d3n0l4nd"; +/// A URL that can be designated as the base for relative URLs. +/// +/// After creation, this URL may be used to get the key for a +/// module in the binary. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct StandaloneRelativeFileBaseUrl<'a>(&'a Url); + +impl<'a> From<&'a Url> for StandaloneRelativeFileBaseUrl<'a> { + fn from(url: &'a Url) -> Self { + Self(url) + } +} + +impl<'a> StandaloneRelativeFileBaseUrl<'a> { + pub fn new(url: &'a Url) -> Self { + debug_assert_eq!(url.scheme(), "file"); + Self(url) + } + + /// Gets the module map key of the provided specifier. + /// + /// * Descendant file specifiers will be made relative to the base. + /// * Non-descendant file specifiers will stay as-is (absolute). + /// * Non-file specifiers will stay as-is. + pub fn specifier_key<'b>(&self, target: &'b Url) -> Cow<'b, str> { + if target.scheme() != "file" { + return Cow::Borrowed(target.as_str()); + } + + match self.0.make_relative(target) { + Some(relative) => { + if relative.starts_with("../") { + Cow::Borrowed(target.as_str()) + } else { + Cow::Owned(relative) + } + } + None => Cow::Borrowed(target.as_str()), + } + } + + pub fn inner(&self) -> &Url { + self.0 + } +} #[derive(Deserialize, Serialize)] pub enum NodeModules { @@ -106,6 +175,7 @@ pub struct SerializedWorkspaceResolver { pub struct Metadata { pub argv: Vec<String>, pub seed: Option<u64>, + pub code_cache_key: Option<u64>, pub permissions: PermissionFlags, pub location: Option<Url>, pub v8_flags: Vec<String>, @@ -118,80 +188,26 @@ pub struct Metadata { pub entrypoint_key: String, pub node_modules: Option<NodeModules>, pub unstable_config: UnstableConfig, -} - -pub fn load_npm_vfs(root_dir_path: PathBuf) -> Result<FileBackedVfs, AnyError> { - let data = libsui::find_section("d3n0l4nd").unwrap(); - - // We do the first part sync so it can complete quickly - let trailer: [u8; TRAILER_SIZE] = data[0..TRAILER_SIZE].try_into().unwrap(); - let trailer = match Trailer::parse(&trailer)? { - None => panic!("Could not find trailer"), - Some(trailer) => trailer, - }; - let data = &data[TRAILER_SIZE..]; - - let vfs_data = - &data[trailer.npm_vfs_pos as usize..trailer.npm_files_pos as usize]; - let mut dir: VirtualDirectory = serde_json::from_slice(vfs_data)?; - - // align the name of the directory with the root dir - dir.name = root_dir_path - .file_name() - .unwrap() - .to_string_lossy() - .to_string(); - - let fs_root = VfsRoot { - dir, - root_path: root_dir_path, - start_file_offset: trailer.npm_files_pos, - }; - Ok(FileBackedVfs::new(data.to_vec(), fs_root)) + pub otel_config: Option<OtelConfig>, // None means disabled. } fn write_binary_bytes( mut file_writer: File, original_bin: Vec<u8>, metadata: &Metadata, - eszip: eszip::EszipV2, - npm_vfs: Option<&VirtualDirectory>, - npm_files: &Vec<Vec<u8>>, + npm_snapshot: Option<SerializedNpmResolutionSnapshot>, + remote_modules: &RemoteModulesStoreBuilder, + vfs: VfsBuilder, compile_flags: &CompileFlags, ) -> Result<(), AnyError> { - let metadata = serde_json::to_string(metadata)?.as_bytes().to_vec(); - let npm_vfs = serde_json::to_string(&npm_vfs)?.as_bytes().to_vec(); - let eszip_archive = eszip.into_bytes(); - - let mut writer = Vec::new(); - - // write the trailer, which includes the positions - // of the data blocks in the file - writer.write_all(&{ - let metadata_pos = eszip_archive.len() as u64; - let npm_vfs_pos = metadata_pos + (metadata.len() as u64); - let npm_files_pos = npm_vfs_pos + (npm_vfs.len() as u64); - Trailer { - eszip_pos: 0, - metadata_pos, - npm_vfs_pos, - npm_files_pos, - } - .as_bytes() - })?; - - writer.write_all(&eszip_archive)?; - writer.write_all(&metadata)?; - writer.write_all(&npm_vfs)?; - for file in npm_files { - writer.write_all(file)?; - } + let data_section_bytes = + serialize_binary_data_section(metadata, npm_snapshot, remote_modules, vfs)?; let target = compile_flags.resolve_target(); if target.contains("linux") { libsui::Elf::new(&original_bin).append( "d3n0l4nd", - &writer, + &data_section_bytes, &mut file_writer, )?; } else if target.contains("windows") { @@ -201,11 +217,11 @@ fn write_binary_bytes( pe = pe.set_icon(&icon)?; } - pe.write_resource("d3n0l4nd", writer)? + pe.write_resource("d3n0l4nd", data_section_bytes)? .build(&mut file_writer)?; } else if target.contains("darwin") { libsui::Macho::from(original_bin)? - .write_section("d3n0l4nd", writer)? + .write_section("d3n0l4nd", data_section_bytes)? .build_and_sign(&mut file_writer)?; } Ok(()) @@ -221,6 +237,67 @@ pub fn is_standalone_binary(exe_path: &Path) -> bool { || libsui::utils::is_macho(&data) } +pub struct StandaloneData { + pub fs: Arc<dyn deno_fs::FileSystem>, + pub metadata: Metadata, + pub modules: StandaloneModules, + pub npm_snapshot: Option<ValidSerializedNpmResolutionSnapshot>, + pub root_path: PathBuf, + pub vfs: Arc<FileBackedVfs>, +} + +pub struct StandaloneModules { + remote_modules: RemoteModulesStore, + vfs: Arc<FileBackedVfs>, +} + +impl StandaloneModules { + pub fn resolve_specifier<'a>( + &'a self, + specifier: &'a ModuleSpecifier, + ) -> Result<Option<&'a ModuleSpecifier>, AnyError> { + if specifier.scheme() == "file" { + Ok(Some(specifier)) + } else { + self.remote_modules.resolve_specifier(specifier) + } + } + + pub fn has_file(&self, path: &Path) -> bool { + self.vfs.file_entry(path).is_ok() + } + + pub fn read<'a>( + &'a self, + specifier: &'a ModuleSpecifier, + ) -> Result<Option<DenoCompileModuleData<'a>>, AnyError> { + if specifier.scheme() == "file" { + let path = deno_path_util::url_to_file_path(specifier)?; + let bytes = match self.vfs.file_entry(&path) { + Ok(entry) => self.vfs.read_file_all(entry)?, + Err(err) if err.kind() == ErrorKind::NotFound => { + let bytes = match RealFs.read_file_sync(&path, None) { + Ok(bytes) => bytes, + Err(FsError::Io(err)) if err.kind() == ErrorKind::NotFound => { + return Ok(None) + } + Err(err) => return Err(err.into()), + }; + Cow::Owned(bytes) + } + Err(err) => return Err(err.into()), + }; + Ok(Some(DenoCompileModuleData { + media_type: MediaType::from_specifier(specifier), + specifier, + data: bytes, + })) + } else { + self.remote_modules.read(specifier) + } + } +} + /// This function will try to run this binary as a standalone binary /// produced by `deno compile`. It determines if this is a standalone /// binary by skipping over the trailer width at the end of the file, @@ -228,110 +305,67 @@ pub fn is_standalone_binary(exe_path: &Path) -> bool { /// the bundle is executed. If not, this function exits with `Ok(None)`. pub fn extract_standalone( cli_args: Cow<Vec<OsString>>, -) -> Result< - Option<impl Future<Output = Result<(Metadata, eszip::EszipV2), AnyError>>>, - AnyError, -> { +) -> Result<Option<StandaloneData>, AnyError> { let Some(data) = libsui::find_section("d3n0l4nd") else { return Ok(None); }; - // We do the first part sync so it can complete quickly - let trailer = match Trailer::parse(&data[0..TRAILER_SIZE])? { + let DeserializedDataSection { + mut metadata, + npm_snapshot, + remote_modules, + mut vfs_dir, + vfs_files_data, + } = match deserialize_binary_data_section(data)? { + Some(data_section) => data_section, None => return Ok(None), - Some(trailer) => trailer, }; + let root_path = { + let maybe_current_exe = std::env::current_exe().ok(); + let current_exe_name = maybe_current_exe + .as_ref() + .and_then(|p| p.file_name()) + .map(|p| p.to_string_lossy()) + // should never happen + .unwrap_or_else(|| Cow::Borrowed("binary")); + std::env::temp_dir().join(format!("deno-compile-{}", current_exe_name)) + }; let cli_args = cli_args.into_owned(); - // If we have an eszip, read it out - Ok(Some(async move { - let bufreader = - deno_core::futures::io::BufReader::new(&data[TRAILER_SIZE..]); - - let (eszip, loader) = eszip::EszipV2::parse(bufreader) - .await - .context("Failed to parse eszip header")?; - - let bufreader = loader.await.context("Failed to parse eszip archive")?; - - let mut metadata = String::new(); - - bufreader - .take(trailer.metadata_len()) - .read_to_string(&mut metadata) - .await - .context("Failed to read metadata from the current executable")?; - - let mut metadata: Metadata = serde_json::from_str(&metadata).unwrap(); - metadata.argv.reserve(cli_args.len() - 1); - for arg in cli_args.into_iter().skip(1) { - metadata.argv.push(arg.into_string().unwrap()); - } - - Ok((metadata, eszip)) - })) -} - -const TRAILER_SIZE: usize = std::mem::size_of::<Trailer>() + 8; // 8 bytes for the magic trailer string - -struct Trailer { - eszip_pos: u64, - metadata_pos: u64, - npm_vfs_pos: u64, - npm_files_pos: u64, -} - -impl Trailer { - pub fn parse(trailer: &[u8]) -> Result<Option<Trailer>, AnyError> { - let (magic_trailer, rest) = trailer.split_at(8); - if magic_trailer != MAGIC_TRAILER { - return Ok(None); - } - - let (eszip_archive_pos, rest) = rest.split_at(8); - let (metadata_pos, rest) = rest.split_at(8); - let (npm_vfs_pos, npm_files_pos) = rest.split_at(8); - let eszip_archive_pos = u64_from_bytes(eszip_archive_pos)?; - let metadata_pos = u64_from_bytes(metadata_pos)?; - let npm_vfs_pos = u64_from_bytes(npm_vfs_pos)?; - let npm_files_pos = u64_from_bytes(npm_files_pos)?; - Ok(Some(Trailer { - eszip_pos: eszip_archive_pos, - metadata_pos, - npm_vfs_pos, - npm_files_pos, - })) - } - - pub fn metadata_len(&self) -> u64 { - self.npm_vfs_pos - self.metadata_pos - } - - pub fn npm_vfs_len(&self) -> u64 { - self.npm_files_pos - self.npm_vfs_pos + metadata.argv.reserve(cli_args.len() - 1); + for arg in cli_args.into_iter().skip(1) { + metadata.argv.push(arg.into_string().unwrap()); } - - pub fn as_bytes(&self) -> Vec<u8> { - let mut trailer = MAGIC_TRAILER.to_vec(); - trailer.write_all(&self.eszip_pos.to_be_bytes()).unwrap(); - trailer.write_all(&self.metadata_pos.to_be_bytes()).unwrap(); - trailer.write_all(&self.npm_vfs_pos.to_be_bytes()).unwrap(); - trailer - .write_all(&self.npm_files_pos.to_be_bytes()) - .unwrap(); - trailer - } -} - -fn u64_from_bytes(arr: &[u8]) -> Result<u64, AnyError> { - let fixed_arr: &[u8; 8] = arr - .try_into() - .context("Failed to convert the buffer into a fixed-size array")?; - Ok(u64::from_be_bytes(*fixed_arr)) + let vfs = { + // align the name of the directory with the root dir + vfs_dir.name = root_path.file_name().unwrap().to_string_lossy().to_string(); + + let fs_root = VfsRoot { + dir: vfs_dir, + root_path: root_path.clone(), + start_file_offset: 0, + }; + Arc::new(FileBackedVfs::new(Cow::Borrowed(vfs_files_data), fs_root)) + }; + let fs: Arc<dyn deno_fs::FileSystem> = + Arc::new(DenoCompileFileSystem::new(vfs.clone())); + Ok(Some(StandaloneData { + fs, + metadata, + modules: StandaloneModules { + remote_modules, + vfs: vfs.clone(), + }, + npm_snapshot, + root_path, + vfs, + })) } pub struct DenoCompileBinaryWriter<'a> { + cjs_tracker: &'a CjsTracker, deno_dir: &'a DenoDir, + emitter: &'a Emitter, file_fetcher: &'a FileFetcher, http_client_provider: &'a HttpClientProvider, npm_resolver: &'a dyn CliNpmResolver, @@ -342,7 +376,9 @@ pub struct DenoCompileBinaryWriter<'a> { impl<'a> DenoCompileBinaryWriter<'a> { #[allow(clippy::too_many_arguments)] pub fn new( + cjs_tracker: &'a CjsTracker, deno_dir: &'a DenoDir, + emitter: &'a Emitter, file_fetcher: &'a FileFetcher, http_client_provider: &'a HttpClientProvider, npm_resolver: &'a dyn CliNpmResolver, @@ -350,7 +386,9 @@ impl<'a> DenoCompileBinaryWriter<'a> { npm_system_info: NpmSystemInfo, ) -> Self { Self { + cjs_tracker, deno_dir, + emitter, file_fetcher, http_client_provider, npm_resolver, @@ -362,8 +400,8 @@ impl<'a> DenoCompileBinaryWriter<'a> { pub async fn write_bin( &self, writer: File, - eszip: eszip::EszipV2, - root_dir_url: EszipRelativeFileBaseUrl<'_>, + graph: &ModuleGraph, + root_dir_url: StandaloneRelativeFileBaseUrl<'_>, entrypoint: &ModuleSpecifier, compile_flags: &CompileFlags, cli_options: &CliOptions, @@ -390,15 +428,17 @@ impl<'a> DenoCompileBinaryWriter<'a> { ) } } - self.write_standalone_binary( - writer, - original_binary, - eszip, - root_dir_url, - entrypoint, - cli_options, - compile_flags, - ) + self + .write_standalone_binary( + writer, + original_binary, + graph, + root_dir_url, + entrypoint, + cli_options, + compile_flags, + ) + .await } async fn get_base_binary( @@ -468,14 +508,18 @@ impl<'a> DenoCompileBinaryWriter<'a> { self .http_client_provider .get_or_create()? - .download_with_progress(download_url.parse()?, None, &progress) + .download_with_progress_and_retries( + download_url.parse()?, + None, + &progress, + ) .await? }; let bytes = match maybe_bytes { Some(bytes) => bytes, None => { log::info!("Download could not be found, aborting"); - std::process::exit(1) + deno_runtime::exit(1); } }; @@ -489,12 +533,12 @@ impl<'a> DenoCompileBinaryWriter<'a> { /// This functions creates a standalone deno binary by appending a bundle /// and magic trailer to the currently executing binary. #[allow(clippy::too_many_arguments)] - fn write_standalone_binary( + async fn write_standalone_binary( &self, writer: File, original_bin: Vec<u8>, - mut eszip: eszip::EszipV2, - root_dir_url: EszipRelativeFileBaseUrl<'_>, + graph: &ModuleGraph, + root_dir_url: StandaloneRelativeFileBaseUrl<'_>, entrypoint: &ModuleSpecifier, cli_options: &CliOptions, compile_flags: &CompileFlags, @@ -508,19 +552,17 @@ impl<'a> DenoCompileBinaryWriter<'a> { None => None, }; let root_path = root_dir_url.inner().to_file_path().unwrap(); - let (npm_vfs, npm_files, node_modules) = match self.npm_resolver.as_inner() + let (maybe_npm_vfs, node_modules, npm_snapshot) = match self + .npm_resolver + .as_inner() { InnerCliNpmResolverRef::Managed(managed) => { let snapshot = managed.serialized_valid_snapshot_for_system(&self.npm_system_info); if !snapshot.as_serialized().packages.is_empty() { - let (root_dir, files) = self - .build_vfs(&root_path, cli_options)? - .into_dir_and_files(); - eszip.add_npm_snapshot(snapshot); + let npm_vfs_builder = self.build_npm_vfs(&root_path, cli_options)?; ( - Some(root_dir), - files, + Some(npm_vfs_builder), Some(NodeModules::Managed { node_modules_dir: self.npm_resolver.root_node_modules_path().map( |path| { @@ -532,18 +574,16 @@ impl<'a> DenoCompileBinaryWriter<'a> { }, ), }), + Some(snapshot), ) } else { - (None, Vec::new(), None) + (None, None, None) } } InnerCliNpmResolverRef::Byonm(resolver) => { - let (root_dir, files) = self - .build_vfs(&root_path, cli_options)? - .into_dir_and_files(); + let npm_vfs_builder = self.build_npm_vfs(&root_path, cli_options)?; ( - Some(root_dir), - files, + Some(npm_vfs_builder), Some(NodeModules::Byonm { root_node_modules_dir: resolver.root_node_modules_path().map( |node_modules_dir| { @@ -556,14 +596,91 @@ impl<'a> DenoCompileBinaryWriter<'a> { }, ), }), + None, ) } }; + let mut vfs = if let Some(npm_vfs) = maybe_npm_vfs { + npm_vfs + } else { + VfsBuilder::new(root_path.clone())? + }; + let mut remote_modules_store = RemoteModulesStoreBuilder::default(); + let mut code_cache_key_hasher = if cli_options.code_cache_enabled() { + Some(FastInsecureHasher::new_deno_versioned()) + } else { + None + }; + for module in graph.modules() { + if module.specifier().scheme() == "data" { + continue; // don't store data urls as an entry as they're in the code + } + if let Some(hasher) = &mut code_cache_key_hasher { + if let Some(source) = module.source() { + hasher.write(module.specifier().as_str().as_bytes()); + hasher.write(source.as_bytes()); + } + } + let (maybe_source, media_type) = match module { + deno_graph::Module::Js(m) => { + let source = if m.media_type.is_emittable() { + let is_cjs = self.cjs_tracker.is_cjs_with_known_is_script( + &m.specifier, + m.media_type, + m.is_script, + )?; + let module_kind = ModuleKind::from_is_cjs(is_cjs); + let source = self + .emitter + .emit_parsed_source( + &m.specifier, + m.media_type, + module_kind, + &m.source, + ) + .await?; + source.into_bytes() + } else { + m.source.as_bytes().to_vec() + }; + (Some(source), m.media_type) + } + deno_graph::Module::Json(m) => { + (Some(m.source.as_bytes().to_vec()), m.media_type) + } + deno_graph::Module::Npm(_) + | deno_graph::Module::Node(_) + | deno_graph::Module::External(_) => (None, MediaType::Unknown), + }; + if module.specifier().scheme() == "file" { + let file_path = deno_path_util::url_to_file_path(module.specifier())?; + vfs + .add_file_with_data( + &file_path, + match maybe_source { + Some(source) => source, + None => RealFs.read_file_sync(&file_path, None)?, + }, + ) + .with_context(|| { + format!("Failed adding '{}'", file_path.display()) + })?; + } else if let Some(source) = maybe_source { + remote_modules_store.add(module.specifier(), media_type, source); + } + } + remote_modules_store.add_redirects(&graph.redirects); let env_vars_from_env_file = match cli_options.env_file_name() { - Some(env_filename) => { - log::info!("{} Environment variables from the file \"{}\" were embedded in the generated executable file", crate::colors::yellow("Warning"), env_filename); - get_file_env_vars(env_filename.to_string())? + Some(env_filenames) => { + let mut aggregated_env_vars = IndexMap::new(); + for env_filename in env_filenames.iter().rev() { + log::info!("{} Environment variables from the file \"{}\" were embedded in the generated executable file", crate::colors::yellow("Warning"), env_filename); + + let env_vars = get_file_env_vars(env_filename.to_string())?; + aggregated_env_vars.extend(env_vars); + } + aggregated_env_vars } None => Default::default(), }; @@ -571,6 +688,7 @@ impl<'a> DenoCompileBinaryWriter<'a> { let metadata = Metadata { argv: compile_flags.args.clone(), seed: cli_options.seed(), + code_cache_key: code_cache_key_hasher.map(|h| h.finish()), location: cli_options.location_flag().clone(), permissions: cli_options.permission_flags().clone(), v8_flags: cli_options.v8_flags().clone(), @@ -625,20 +743,21 @@ impl<'a> DenoCompileBinaryWriter<'a> { sloppy_imports: cli_options.unstable_sloppy_imports(), features: cli_options.unstable_features(), }, + otel_config: cli_options.otel_config(), }; write_binary_bytes( writer, original_bin, &metadata, - eszip, - npm_vfs.as_ref(), - &npm_files, + npm_snapshot.map(|s| s.into_serialized()), + &remote_modules_store, + vfs, compile_flags, ) } - fn build_vfs( + fn build_npm_vfs( &self, root_path: &Path, cli_options: &CliOptions, @@ -659,8 +778,9 @@ impl<'a> DenoCompileBinaryWriter<'a> { } else { // DO NOT include the user's registry url as it may contain credentials, // but also don't make this dependent on the registry url - let root_path = npm_resolver.global_cache_root_folder(); - let mut builder = VfsBuilder::new(root_path)?; + let global_cache_root_path = npm_resolver.global_cache_root_path(); + let mut builder = + VfsBuilder::new(global_cache_root_path.to_path_buf())?; let mut packages = npm_resolver.all_system_packages(&self.npm_system_info); packages.sort_by(|a, b| a.id.cmp(&b.id)); // determinism @@ -670,12 +790,12 @@ impl<'a> DenoCompileBinaryWriter<'a> { builder.add_dir_recursive(&folder)?; } - // Flatten all the registries folders into a single "node_modules/localhost" folder + // Flatten all the registries folders into a single ".deno_compile_node_modules/localhost" folder // that will be used by denort when loading the npm cache. This avoids us exposing // the user's private registry information and means we don't have to bother // serializing all the different registry config into the binary. builder.with_root_dir(|root_dir| { - root_dir.name = "node_modules".to_string(); + root_dir.name = ".deno_compile_node_modules".to_string(); let mut new_entries = Vec::with_capacity(root_dir.entries.len()); let mut localhost_entries = IndexMap::new(); for entry in std::mem::take(&mut root_dir.entries) { @@ -710,6 +830,8 @@ impl<'a> DenoCompileBinaryWriter<'a> { root_dir.entries = new_entries; }); + builder.set_new_root_path(root_path.to_path_buf())?; + Ok(builder) } } diff --git a/cli/standalone/code_cache.rs b/cli/standalone/code_cache.rs new file mode 100644 index 000000000..25b490544 --- /dev/null +++ b/cli/standalone/code_cache.rs @@ -0,0 +1,514 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::io::BufReader; +use std::io::BufWriter; +use std::io::Read; +use std::io::Write; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Arc; + +use deno_ast::ModuleSpecifier; +use deno_core::anyhow::bail; +use deno_core::error::AnyError; +use deno_core::parking_lot::Mutex; +use deno_core::unsync::sync::AtomicFlag; +use deno_runtime::code_cache::CodeCache; +use deno_runtime::code_cache::CodeCacheType; + +use crate::cache::FastInsecureHasher; +use crate::util::path::get_atomic_file_path; +use crate::worker::CliCodeCache; + +enum CodeCacheStrategy { + FirstRun(FirstRunCodeCacheStrategy), + SubsequentRun(SubsequentRunCodeCacheStrategy), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DenoCompileCodeCacheEntry { + pub source_hash: u64, + pub data: Vec<u8>, +} + +pub struct DenoCompileCodeCache { + strategy: CodeCacheStrategy, +} + +impl DenoCompileCodeCache { + pub fn new(file_path: PathBuf, cache_key: u64) -> Self { + // attempt to deserialize the cache data + match deserialize(&file_path, cache_key) { + Ok(data) => { + log::debug!("Loaded {} code cache entries", data.len()); + Self { + strategy: CodeCacheStrategy::SubsequentRun( + SubsequentRunCodeCacheStrategy { + is_finished: AtomicFlag::lowered(), + data: Mutex::new(data), + }, + ), + } + } + Err(err) => { + log::debug!("Failed to deserialize code cache: {:#}", err); + Self { + strategy: CodeCacheStrategy::FirstRun(FirstRunCodeCacheStrategy { + cache_key, + file_path, + is_finished: AtomicFlag::lowered(), + data: Mutex::new(FirstRunCodeCacheData { + cache: HashMap::new(), + add_count: 0, + }), + }), + } + } + } + } +} + +impl CodeCache for DenoCompileCodeCache { + fn get_sync( + &self, + specifier: &ModuleSpecifier, + code_cache_type: CodeCacheType, + source_hash: u64, + ) -> Option<Vec<u8>> { + match &self.strategy { + CodeCacheStrategy::FirstRun(strategy) => { + if !strategy.is_finished.is_raised() { + // we keep track of how many times the cache is requested + // then serialize the cache when we get that number of + // "set" calls + strategy.data.lock().add_count += 1; + } + None + } + CodeCacheStrategy::SubsequentRun(strategy) => { + if strategy.is_finished.is_raised() { + return None; + } + strategy.take_from_cache(specifier, code_cache_type, source_hash) + } + } + } + + fn set_sync( + &self, + specifier: ModuleSpecifier, + code_cache_type: CodeCacheType, + source_hash: u64, + bytes: &[u8], + ) { + match &self.strategy { + CodeCacheStrategy::FirstRun(strategy) => { + if strategy.is_finished.is_raised() { + return; + } + + let data_to_serialize = { + let mut data = strategy.data.lock(); + data.cache.insert( + (specifier.to_string(), code_cache_type), + DenoCompileCodeCacheEntry { + source_hash, + data: bytes.to_vec(), + }, + ); + if data.add_count != 0 { + data.add_count -= 1; + } + if data.add_count == 0 { + // don't allow using the cache anymore + strategy.is_finished.raise(); + if data.cache.is_empty() { + None + } else { + Some(std::mem::take(&mut data.cache)) + } + } else { + None + } + }; + if let Some(cache_data) = &data_to_serialize { + strategy.write_cache_data(cache_data); + } + } + CodeCacheStrategy::SubsequentRun(_) => { + // do nothing + } + } + } +} + +impl CliCodeCache for DenoCompileCodeCache { + fn enabled(&self) -> bool { + match &self.strategy { + CodeCacheStrategy::FirstRun(strategy) => { + !strategy.is_finished.is_raised() + } + CodeCacheStrategy::SubsequentRun(strategy) => { + !strategy.is_finished.is_raised() + } + } + } + + fn as_code_cache(self: Arc<Self>) -> Arc<dyn CodeCache> { + self + } +} + +type CodeCacheKey = (String, CodeCacheType); + +struct FirstRunCodeCacheData { + cache: HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, + add_count: usize, +} + +struct FirstRunCodeCacheStrategy { + cache_key: u64, + file_path: PathBuf, + is_finished: AtomicFlag, + data: Mutex<FirstRunCodeCacheData>, +} + +impl FirstRunCodeCacheStrategy { + fn write_cache_data( + &self, + cache_data: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, + ) { + let count = cache_data.len(); + let temp_file = get_atomic_file_path(&self.file_path); + match serialize(&temp_file, self.cache_key, cache_data) { + Ok(()) => { + if let Err(err) = std::fs::rename(&temp_file, &self.file_path) { + log::debug!("Failed to rename code cache: {}", err); + } else { + log::debug!("Serialized {} code cache entries", count); + } + } + Err(err) => { + let _ = std::fs::remove_file(&temp_file); + log::debug!("Failed to serialize code cache: {}", err); + } + } + } +} + +struct SubsequentRunCodeCacheStrategy { + is_finished: AtomicFlag, + data: Mutex<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>>, +} + +impl SubsequentRunCodeCacheStrategy { + fn take_from_cache( + &self, + specifier: &ModuleSpecifier, + code_cache_type: CodeCacheType, + source_hash: u64, + ) -> Option<Vec<u8>> { + let mut data = self.data.lock(); + // todo(dsherret): how to avoid the clone here? + let entry = data.remove(&(specifier.to_string(), code_cache_type))?; + if entry.source_hash != source_hash { + return None; + } + if data.is_empty() { + self.is_finished.raise(); + } + Some(entry.data) + } +} + +/// File format: +/// - <header> +/// - <cache key> +/// - <u32: number of entries> +/// - <[entry length]> - u64 * number of entries +/// - <[entry]> +/// - <[u8]: entry data> +/// - <String: specifier> +/// - <u8>: code cache type +/// - <u32: specifier length> +/// - <u64: source hash> +/// - <u64: entry data hash> +fn serialize( + file_path: &Path, + cache_key: u64, + cache: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, +) -> Result<(), AnyError> { + let cache_file = std::fs::OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(file_path)?; + let mut writer = BufWriter::new(cache_file); + serialize_with_writer(&mut writer, cache_key, cache) +} + +fn serialize_with_writer<T: Write>( + writer: &mut BufWriter<T>, + cache_key: u64, + cache: &HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, +) -> Result<(), AnyError> { + // header + writer.write_all(&cache_key.to_le_bytes())?; + writer.write_all(&(cache.len() as u32).to_le_bytes())?; + // lengths of each entry + for ((specifier, _), entry) in cache { + let len: u64 = + entry.data.len() as u64 + specifier.len() as u64 + 1 + 4 + 8 + 8; + writer.write_all(&len.to_le_bytes())?; + } + // entries + for ((specifier, code_cache_type), entry) in cache { + writer.write_all(&entry.data)?; + writer.write_all(&[match code_cache_type { + CodeCacheType::EsModule => 0, + CodeCacheType::Script => 1, + }])?; + writer.write_all(specifier.as_bytes())?; + writer.write_all(&(specifier.len() as u32).to_le_bytes())?; + writer.write_all(&entry.source_hash.to_le_bytes())?; + let hash: u64 = FastInsecureHasher::new_without_deno_version() + .write(&entry.data) + .finish(); + writer.write_all(&hash.to_le_bytes())?; + } + + writer.flush()?; + + Ok(()) +} + +fn deserialize( + file_path: &Path, + expected_cache_key: u64, +) -> Result<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, AnyError> { + let cache_file = std::fs::File::open(file_path)?; + let mut reader = BufReader::new(cache_file); + deserialize_with_reader(&mut reader, expected_cache_key) +} + +fn deserialize_with_reader<T: Read>( + reader: &mut BufReader<T>, + expected_cache_key: u64, +) -> Result<HashMap<CodeCacheKey, DenoCompileCodeCacheEntry>, AnyError> { + // it's very important to use this below so that a corrupt cache file + // doesn't cause a memory allocation error + fn new_vec_sized<T: Clone>( + capacity: usize, + default_value: T, + ) -> Result<Vec<T>, AnyError> { + let mut vec = Vec::new(); + vec.try_reserve(capacity)?; + vec.resize(capacity, default_value); + Ok(vec) + } + + fn try_subtract(a: usize, b: usize) -> Result<usize, AnyError> { + if a < b { + bail!("Integer underflow"); + } + Ok(a - b) + } + + let mut header_bytes = vec![0; 8 + 4]; + reader.read_exact(&mut header_bytes)?; + let actual_cache_key = u64::from_le_bytes(header_bytes[..8].try_into()?); + if actual_cache_key != expected_cache_key { + // cache bust + bail!("Cache key mismatch"); + } + let len = u32::from_le_bytes(header_bytes[8..].try_into()?) as usize; + // read the lengths for each entry found in the file + let entry_len_bytes_capacity = len * 8; + let mut entry_len_bytes = new_vec_sized(entry_len_bytes_capacity, 0)?; + reader.read_exact(&mut entry_len_bytes)?; + let mut lengths = Vec::new(); + lengths.try_reserve(len)?; + for i in 0..len { + let pos = i * 8; + lengths.push( + u64::from_le_bytes(entry_len_bytes[pos..pos + 8].try_into()?) as usize, + ); + } + + let mut map = HashMap::new(); + map.try_reserve(len)?; + for len in lengths { + let mut buffer = new_vec_sized(len, 0)?; + reader.read_exact(&mut buffer)?; + let entry_data_hash_start_pos = try_subtract(buffer.len(), 8)?; + let expected_entry_data_hash = + u64::from_le_bytes(buffer[entry_data_hash_start_pos..].try_into()?); + let source_hash_start_pos = try_subtract(entry_data_hash_start_pos, 8)?; + let source_hash = u64::from_le_bytes( + buffer[source_hash_start_pos..entry_data_hash_start_pos].try_into()?, + ); + let specifier_end_pos = try_subtract(source_hash_start_pos, 4)?; + let specifier_len = u32::from_le_bytes( + buffer[specifier_end_pos..source_hash_start_pos].try_into()?, + ) as usize; + let specifier_start_pos = try_subtract(specifier_end_pos, specifier_len)?; + let specifier = String::from_utf8( + buffer[specifier_start_pos..specifier_end_pos].to_vec(), + )?; + let code_cache_type_pos = try_subtract(specifier_start_pos, 1)?; + let code_cache_type = match buffer[code_cache_type_pos] { + 0 => CodeCacheType::EsModule, + 1 => CodeCacheType::Script, + _ => bail!("Invalid code cache type"), + }; + buffer.truncate(code_cache_type_pos); + let actual_entry_data_hash: u64 = + FastInsecureHasher::new_without_deno_version() + .write(&buffer) + .finish(); + if expected_entry_data_hash != actual_entry_data_hash { + bail!("Hash mismatch.") + } + map.insert( + (specifier, code_cache_type), + DenoCompileCodeCacheEntry { + source_hash, + data: buffer, + }, + ); + } + + Ok(map) +} + +#[cfg(test)] +mod test { + use test_util::TempDir; + + use super::*; + use std::fs::File; + + #[test] + fn serialize_deserialize() { + let cache_key = 123456; + let cache = { + let mut cache = HashMap::new(); + cache.insert( + ("specifier1".to_string(), CodeCacheType::EsModule), + DenoCompileCodeCacheEntry { + source_hash: 1, + data: vec![1, 2, 3], + }, + ); + cache.insert( + ("specifier2".to_string(), CodeCacheType::EsModule), + DenoCompileCodeCacheEntry { + source_hash: 2, + data: vec![4, 5, 6], + }, + ); + cache.insert( + ("specifier2".to_string(), CodeCacheType::Script), + DenoCompileCodeCacheEntry { + source_hash: 2, + data: vec![6, 5, 1], + }, + ); + cache + }; + let mut buffer = Vec::new(); + serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache) + .unwrap(); + let deserialized = + deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key) + .unwrap(); + assert_eq!(cache, deserialized); + } + + #[test] + fn serialize_deserialize_empty() { + let cache_key = 1234; + let cache = HashMap::new(); + let mut buffer = Vec::new(); + serialize_with_writer(&mut BufWriter::new(&mut buffer), cache_key, &cache) + .unwrap(); + let deserialized = + deserialize_with_reader(&mut BufReader::new(&buffer[..]), cache_key) + .unwrap(); + assert_eq!(cache, deserialized); + } + + #[test] + fn serialize_deserialize_corrupt() { + let buffer = "corrupttestingtestingtesting".as_bytes().to_vec(); + let err = deserialize_with_reader(&mut BufReader::new(&buffer[..]), 1234) + .unwrap_err(); + assert_eq!(err.to_string(), "Cache key mismatch"); + } + + #[test] + fn code_cache() { + let temp_dir = TempDir::new(); + let file_path = temp_dir.path().join("cache.bin").to_path_buf(); + let url1 = ModuleSpecifier::parse("https://deno.land/example1.js").unwrap(); + let url2 = ModuleSpecifier::parse("https://deno.land/example2.js").unwrap(); + // first run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234); + assert!(code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .is_none()); + assert!(code_cache + .get_sync(&url2, CodeCacheType::EsModule, 1) + .is_none()); + assert!(code_cache.enabled()); + code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[1, 2, 3]); + assert!(code_cache.enabled()); + assert!(!file_path.exists()); + code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[2, 1, 3]); + assert!(file_path.exists()); // now the new code cache exists + assert!(!code_cache.enabled()); // no longer enabled + } + // second run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 1234); + assert!(code_cache.enabled()); + let result1 = code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .unwrap(); + assert!(code_cache.enabled()); + let result2 = code_cache + .get_sync(&url2, CodeCacheType::EsModule, 1) + .unwrap(); + assert!(!code_cache.enabled()); // no longer enabled + assert_eq!(result1, vec![1, 2, 3]); + assert_eq!(result2, vec![2, 1, 3]); + } + + // new cache key first run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321); + assert!(code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .is_none()); + assert!(code_cache + .get_sync(&url2, CodeCacheType::EsModule, 1) + .is_none()); + code_cache.set_sync(url1.clone(), CodeCacheType::EsModule, 0, &[2, 2, 3]); + code_cache.set_sync(url2.clone(), CodeCacheType::EsModule, 1, &[3, 2, 3]); + } + // new cache key second run + { + let code_cache = DenoCompileCodeCache::new(file_path.clone(), 54321); + let result1 = code_cache + .get_sync(&url1, CodeCacheType::EsModule, 0) + .unwrap(); + assert_eq!(result1, vec![2, 2, 3]); + assert!(code_cache + .get_sync(&url2, CodeCacheType::EsModule, 5) // different hash will cause none + .is_none()); + } + } +} diff --git a/cli/standalone/file_system.rs b/cli/standalone/file_system.rs index 314444630..712c6ee91 100644 --- a/cli/standalone/file_system.rs +++ b/cli/standalone/file_system.rs @@ -22,8 +22,8 @@ use super::virtual_fs::FileBackedVfs; pub struct DenoCompileFileSystem(Arc<FileBackedVfs>); impl DenoCompileFileSystem { - pub fn new(vfs: FileBackedVfs) -> Self { - Self(Arc::new(vfs)) + pub fn new(vfs: Arc<FileBackedVfs>) -> Self { + Self(vfs) } fn error_if_in_vfs(&self, path: &Path) -> FsResult<()> { diff --git a/cli/standalone/mod.rs b/cli/standalone/mod.rs index 258de0dad..b9f0b1d5b 100644 --- a/cli/standalone/mod.rs +++ b/cli/standalone/mod.rs @@ -5,6 +5,9 @@ #![allow(dead_code)] #![allow(unused_imports)] +use binary::StandaloneData; +use binary::StandaloneModules; +use code_cache::DenoCompileCodeCache; use deno_ast::MediaType; use deno_cache_dir::npm::NpmCacheDir; use deno_config::workspace::MappedResolution; @@ -15,8 +18,10 @@ use deno_core::anyhow::Context; use deno_core::error::generic_error; use deno_core::error::type_error; use deno_core::error::AnyError; +use deno_core::futures::future::LocalBoxFuture; use deno_core::futures::FutureExt; use deno_core::v8_set_flags; +use deno_core::FastString; use deno_core::FeatureChecker; use deno_core::ModuleLoader; use deno_core::ModuleSourceCode; @@ -24,11 +29,15 @@ use deno_core::ModuleSpecifier; use deno_core::ModuleType; use deno_core::RequestedModuleType; use deno_core::ResolutionKind; +use deno_core::SourceCodeCacheInfo; use deno_npm::npm_rc::ResolvedNpmRc; use deno_package_json::PackageJsonDepValue; +use deno_resolver::npm::NpmReqResolverOptions; use deno_runtime::deno_fs; use deno_runtime::deno_node::create_host_defined_options; +use deno_runtime::deno_node::NodeRequireLoader; use deno_runtime::deno_node::NodeResolver; +use deno_runtime::deno_node::PackageJsonResolver; use deno_runtime::deno_permissions::Permissions; use deno_runtime::deno_permissions::PermissionsContainer; use deno_runtime::deno_tls::rustls::RootCertStore; @@ -38,10 +47,12 @@ use deno_runtime::permissions::RuntimePermissionDescriptorParser; use deno_runtime::WorkerExecutionMode; use deno_runtime::WorkerLogLevel; use deno_semver::npm::NpmPackageReqReference; -use eszip::EszipRelativeFileBaseUrl; use import_map::parse_from_json; use node_resolver::analyze::NodeCodeTranslator; +use node_resolver::errors::ClosestPkgJsonError; +use node_resolver::NodeModuleKind; use node_resolver::NodeResolutionMode; +use serialization::DenoCompileModuleSource; use std::borrow::Cow; use std::rc::Rc; use std::sync::Arc; @@ -54,86 +65,102 @@ use crate::args::CacheSetting; use crate::args::NpmInstallDepsProvider; use crate::args::StorageKeyResolver; use crate::cache::Caches; +use crate::cache::DenoCacheEnvFsAdapter; use crate::cache::DenoDirProvider; +use crate::cache::FastInsecureHasher; use crate::cache::NodeAnalysisCache; use crate::cache::RealDenoCacheEnv; use crate::http_util::HttpClientProvider; use crate::node::CliCjsCodeAnalyzer; +use crate::node::CliNodeCodeTranslator; use crate::npm::create_cli_npm_resolver; +use crate::npm::create_in_npm_pkg_checker; use crate::npm::CliByonmNpmResolverCreateOptions; +use crate::npm::CliManagedInNpmPkgCheckerCreateOptions; +use crate::npm::CliManagedNpmResolverCreateOptions; +use crate::npm::CliNpmResolver; use crate::npm::CliNpmResolverCreateOptions; -use crate::npm::CliNpmResolverManagedCreateOptions; use crate::npm::CliNpmResolverManagedSnapshotOption; -use crate::resolver::CjsResolutionStore; +use crate::npm::CreateInNpmPkgCheckerOptions; +use crate::resolver::CjsTracker; use crate::resolver::CliDenoResolverFs; -use crate::resolver::CliNodeResolver; +use crate::resolver::CliNpmReqResolver; +use crate::resolver::IsCjsResolverOptions; use crate::resolver::NpmModuleLoader; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::ProgressBarStyle; use crate::util::v8::construct_v8_flags; +use crate::worker::CliCodeCache; use crate::worker::CliMainWorkerFactory; use crate::worker::CliMainWorkerOptions; -use crate::worker::ModuleLoaderAndSourceMapGetter; +use crate::worker::CreateModuleLoaderResult; use crate::worker::ModuleLoaderFactory; pub mod binary; +mod code_cache; mod file_system; +mod serialization; mod virtual_fs; pub use binary::extract_standalone; pub use binary::is_standalone_binary; pub use binary::DenoCompileBinaryWriter; -use self::binary::load_npm_vfs; use self::binary::Metadata; use self::file_system::DenoCompileFileSystem; -struct WorkspaceEszipModule { - specifier: ModuleSpecifier, - inner: eszip::Module, -} - -struct WorkspaceEszip { - eszip: eszip::EszipV2, - root_dir_url: Arc<ModuleSpecifier>, +struct SharedModuleLoaderState { + cjs_tracker: Arc<CjsTracker>, + fs: Arc<dyn deno_fs::FileSystem>, + modules: StandaloneModules, + node_code_translator: Arc<CliNodeCodeTranslator>, + node_resolver: Arc<NodeResolver>, + npm_module_loader: Arc<NpmModuleLoader>, + npm_req_resolver: Arc<CliNpmReqResolver>, + npm_resolver: Arc<dyn CliNpmResolver>, + workspace_resolver: WorkspaceResolver, + code_cache: Option<Arc<dyn CliCodeCache>>, } -impl WorkspaceEszip { - pub fn get_module( +impl SharedModuleLoaderState { + fn get_code_cache( &self, specifier: &ModuleSpecifier, - ) -> Option<WorkspaceEszipModule> { - if specifier.scheme() == "file" { - let specifier_key = EszipRelativeFileBaseUrl::new(&self.root_dir_url) - .specifier_key(specifier); - let module = self.eszip.get_module(&specifier_key)?; - let specifier = self.root_dir_url.join(&module.specifier).unwrap(); - Some(WorkspaceEszipModule { - specifier, - inner: module, - }) - } else { - let module = self.eszip.get_module(specifier.as_str())?; - Some(WorkspaceEszipModule { - specifier: ModuleSpecifier::parse(&module.specifier).unwrap(), - inner: module, - }) + source: &[u8], + ) -> Option<SourceCodeCacheInfo> { + let Some(code_cache) = &self.code_cache else { + return None; + }; + if !code_cache.enabled() { + return None; } + // deno version is already included in the root cache key + let hash = FastInsecureHasher::new_without_deno_version() + .write_hashable(source) + .finish(); + let data = code_cache.get_sync( + specifier, + deno_runtime::code_cache::CodeCacheType::EsModule, + hash, + ); + Some(SourceCodeCacheInfo { + hash, + data: data.map(Cow::Owned), + }) } } -struct SharedModuleLoaderState { - eszip: WorkspaceEszip, - workspace_resolver: WorkspaceResolver, - node_resolver: Arc<CliNodeResolver>, - npm_module_loader: Arc<NpmModuleLoader>, -} - #[derive(Clone)] struct EmbeddedModuleLoader { shared: Arc<SharedModuleLoaderState>, } +impl std::fmt::Debug for EmbeddedModuleLoader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("EmbeddedModuleLoader").finish() + } +} + pub const MODULE_NOT_FOUND: &str = "Module not found"; pub const UNSUPPORTED_SCHEME: &str = "Unsupported scheme"; @@ -158,13 +185,27 @@ impl ModuleLoader for EmbeddedModuleLoader { type_error(format!("Referrer uses invalid specifier: {}", err)) })? }; + let referrer_kind = if self + .shared + .cjs_tracker + .is_maybe_cjs(&referrer, MediaType::from_specifier(&referrer))? + { + NodeModuleKind::Cjs + } else { + NodeModuleKind::Esm + }; if self.shared.node_resolver.in_npm_package(&referrer) { return Ok( self .shared .node_resolver - .resolve(raw_specifier, &referrer, NodeResolutionMode::Execution)? + .resolve( + raw_specifier, + &referrer, + referrer_kind, + NodeResolutionMode::Execution, + )? .into_url(), ); } @@ -186,13 +227,13 @@ impl ModuleLoader for EmbeddedModuleLoader { self .shared .node_resolver - .resolve_package_sub_path_from_deno_module( + .resolve_package_subpath_from_deno_module( pkg_json.dir_path(), sub_path.as_deref(), Some(&referrer), + referrer_kind, NodeResolutionMode::Execution, - )? - .into_url(), + )?, ), Ok(MappedResolution::PackageJson { dep_result, @@ -202,14 +243,15 @@ impl ModuleLoader for EmbeddedModuleLoader { }) => match dep_result.as_ref().map_err(|e| AnyError::from(e.clone()))? { PackageJsonDepValue::Req(req) => self .shared - .node_resolver + .npm_req_resolver .resolve_req_with_sub_path( req, sub_path.as_deref(), &referrer, + referrer_kind, NodeResolutionMode::Execution, ) - .map(|res| res.into_url()), + .map_err(AnyError::from), PackageJsonDepValue::Workspace(version_req) => { let pkg_folder = self .shared @@ -222,13 +264,13 @@ impl ModuleLoader for EmbeddedModuleLoader { self .shared .node_resolver - .resolve_package_sub_path_from_deno_module( + .resolve_package_subpath_from_deno_module( pkg_folder, sub_path.as_deref(), Some(&referrer), + referrer_kind, NodeResolutionMode::Execution, - )? - .into_url(), + )?, ) } }, @@ -237,20 +279,19 @@ impl ModuleLoader for EmbeddedModuleLoader { if let Ok(reference) = NpmPackageReqReference::from_specifier(&specifier) { - return self - .shared - .node_resolver - .resolve_req_reference( - &reference, - &referrer, - NodeResolutionMode::Execution, - ) - .map(|res| res.into_url()); + return Ok(self.shared.npm_req_resolver.resolve_req_reference( + &reference, + &referrer, + referrer_kind, + NodeResolutionMode::Execution, + )?); } if specifier.scheme() == "jsr" { - if let Some(module) = self.shared.eszip.get_module(&specifier) { - return Ok(module.specifier); + if let Some(specifier) = + self.shared.modules.resolve_specifier(&specifier)? + { + return Ok(specifier.clone()); } } @@ -258,16 +299,17 @@ impl ModuleLoader for EmbeddedModuleLoader { self .shared .node_resolver - .handle_if_in_node_modules(&specifier)? + .handle_if_in_node_modules(&specifier) .unwrap_or(specifier), ) } Err(err) if err.is_unmapped_bare_specifier() && referrer.scheme() == "file" => { - let maybe_res = self.shared.node_resolver.resolve_if_for_npm_pkg( + let maybe_res = self.shared.npm_req_resolver.resolve_if_for_npm_pkg( raw_specifier, &referrer, + referrer_kind, NodeResolutionMode::Execution, )?; if let Some(res) = maybe_res { @@ -322,14 +364,19 @@ impl ModuleLoader for EmbeddedModuleLoader { } if self.shared.node_resolver.in_npm_package(original_specifier) { - let npm_module_loader = self.shared.npm_module_loader.clone(); + let shared = self.shared.clone(); let original_specifier = original_specifier.clone(); let maybe_referrer = maybe_referrer.cloned(); return deno_core::ModuleLoadResponse::Async( async move { - let code_source = npm_module_loader + let code_source = shared + .npm_module_loader .load(&original_specifier, maybe_referrer.as_ref()) .await?; + let code_cache_entry = shared.get_code_cache( + &code_source.found_url, + code_source.code.as_bytes(), + ); Ok(deno_core::ModuleSource::new_with_redirect( match code_source.media_type { MediaType::Json => ModuleType::Json, @@ -338,89 +385,177 @@ impl ModuleLoader for EmbeddedModuleLoader { code_source.code, &original_specifier, &code_source.found_url, - None, + code_cache_entry, )) } .boxed_local(), ); } - let Some(module) = self.shared.eszip.get_module(original_specifier) else { - return deno_core::ModuleLoadResponse::Sync(Err(type_error(format!( - "{MODULE_NOT_FOUND}: {}", - original_specifier - )))); - }; - let original_specifier = original_specifier.clone(); - - deno_core::ModuleLoadResponse::Async( - async move { - let code = module.inner.source().await.ok_or_else(|| { - type_error(format!("Module not found: {}", original_specifier)) - })?; - let code = arc_u8_to_arc_str(code) - .map_err(|_| type_error("Module source is not utf-8"))?; - Ok(deno_core::ModuleSource::new_with_redirect( - match module.inner.kind { - eszip::ModuleKind::JavaScript => ModuleType::JavaScript, - eszip::ModuleKind::Json => ModuleType::Json, - eszip::ModuleKind::Jsonc => { - return Err(type_error("jsonc modules not supported")) - } - eszip::ModuleKind::OpaqueData => { - unreachable!(); + match self.shared.modules.read(original_specifier) { + Ok(Some(module)) => { + let media_type = module.media_type; + let (module_specifier, module_type, module_source) = + module.into_parts(); + let is_maybe_cjs = match self + .shared + .cjs_tracker + .is_maybe_cjs(original_specifier, media_type) + { + Ok(is_maybe_cjs) => is_maybe_cjs, + Err(err) => { + return deno_core::ModuleLoadResponse::Sync(Err(type_error( + format!("{:?}", err), + ))); + } + }; + if is_maybe_cjs { + let original_specifier = original_specifier.clone(); + let module_specifier = module_specifier.clone(); + let shared = self.shared.clone(); + deno_core::ModuleLoadResponse::Async( + async move { + let source = match module_source { + DenoCompileModuleSource::String(string) => { + Cow::Borrowed(string) + } + DenoCompileModuleSource::Bytes(module_code_bytes) => { + match module_code_bytes { + Cow::Owned(bytes) => Cow::Owned( + crate::util::text_encoding::from_utf8_lossy_owned(bytes), + ), + Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), + } + } + }; + let source = shared + .node_code_translator + .translate_cjs_to_esm(&module_specifier, Some(source)) + .await?; + let module_source = match source { + Cow::Owned(source) => ModuleSourceCode::String(source.into()), + Cow::Borrowed(source) => { + ModuleSourceCode::String(FastString::from_static(source)) + } + }; + let code_cache_entry = shared + .get_code_cache(&module_specifier, module_source.as_bytes()); + Ok(deno_core::ModuleSource::new_with_redirect( + module_type, + module_source, + &original_specifier, + &module_specifier, + code_cache_entry, + )) } - }, - ModuleSourceCode::String(code.into()), - &original_specifier, - &module.specifier, - None, - )) + .boxed_local(), + ) + } else { + let module_source = module_source.into_for_v8(); + let code_cache_entry = self + .shared + .get_code_cache(module_specifier, module_source.as_bytes()); + deno_core::ModuleLoadResponse::Sync(Ok( + deno_core::ModuleSource::new_with_redirect( + module_type, + module_source, + original_specifier, + module_specifier, + code_cache_entry, + ), + )) + } } - .boxed_local(), - ) + Ok(None) => deno_core::ModuleLoadResponse::Sync(Err(type_error( + format!("{MODULE_NOT_FOUND}: {}", original_specifier), + ))), + Err(err) => deno_core::ModuleLoadResponse::Sync(Err(type_error( + format!("{:?}", err), + ))), + } + } + + fn code_cache_ready( + &self, + specifier: ModuleSpecifier, + source_hash: u64, + code_cache_data: &[u8], + ) -> LocalBoxFuture<'static, ()> { + if let Some(code_cache) = &self.shared.code_cache { + code_cache.set_sync( + specifier, + deno_runtime::code_cache::CodeCacheType::EsModule, + source_hash, + code_cache_data, + ); + } + std::future::ready(()).boxed_local() } } -fn arc_u8_to_arc_str( - arc_u8: Arc<[u8]>, -) -> Result<Arc<str>, std::str::Utf8Error> { - // Check that the string is valid UTF-8. - std::str::from_utf8(&arc_u8)?; - // SAFETY: the string is valid UTF-8, and the layout Arc<[u8]> is the same as - // Arc<str>. This is proven by the From<Arc<str>> impl for Arc<[u8]> from the - // standard library. - Ok(unsafe { - std::mem::transmute::<std::sync::Arc<[u8]>, std::sync::Arc<str>>(arc_u8) - }) +impl NodeRequireLoader for EmbeddedModuleLoader { + fn ensure_read_permission<'a>( + &self, + permissions: &mut dyn deno_runtime::deno_node::NodePermissions, + path: &'a std::path::Path, + ) -> Result<Cow<'a, std::path::Path>, AnyError> { + if self.shared.modules.has_file(path) { + // allow reading if the file is in the snapshot + return Ok(Cow::Borrowed(path)); + } + + self + .shared + .npm_resolver + .ensure_read_permission(permissions, path) + } + + fn load_text_file_lossy( + &self, + path: &std::path::Path, + ) -> Result<String, AnyError> { + Ok(self.shared.fs.read_text_file_lossy_sync(path, None)?) + } + + fn is_maybe_cjs( + &self, + specifier: &ModuleSpecifier, + ) -> Result<bool, ClosestPkgJsonError> { + let media_type = MediaType::from_specifier(specifier); + self.shared.cjs_tracker.is_maybe_cjs(specifier, media_type) + } } struct StandaloneModuleLoaderFactory { shared: Arc<SharedModuleLoaderState>, } +impl StandaloneModuleLoaderFactory { + pub fn create_result(&self) -> CreateModuleLoaderResult { + let loader = Rc::new(EmbeddedModuleLoader { + shared: self.shared.clone(), + }); + CreateModuleLoaderResult { + module_loader: loader.clone(), + node_require_loader: loader, + } + } +} + impl ModuleLoaderFactory for StandaloneModuleLoaderFactory { fn create_for_main( &self, _root_permissions: PermissionsContainer, - ) -> ModuleLoaderAndSourceMapGetter { - ModuleLoaderAndSourceMapGetter { - module_loader: Rc::new(EmbeddedModuleLoader { - shared: self.shared.clone(), - }), - } + ) -> CreateModuleLoaderResult { + self.create_result() } fn create_for_worker( &self, _parent_permissions: PermissionsContainer, _permissions: PermissionsContainer, - ) -> ModuleLoaderAndSourceMapGetter { - ModuleLoaderAndSourceMapGetter { - module_loader: Rc::new(EmbeddedModuleLoader { - shared: self.shared.clone(), - }), - } + ) -> CreateModuleLoaderResult { + self.create_result() } } @@ -439,13 +574,15 @@ impl RootCertStoreProvider for StandaloneRootCertStoreProvider { } } -pub async fn run( - mut eszip: eszip::EszipV2, - metadata: Metadata, -) -> Result<i32, AnyError> { - let current_exe_path = std::env::current_exe().unwrap(); - let current_exe_name = - current_exe_path.file_name().unwrap().to_string_lossy(); +pub async fn run(data: StandaloneData) -> Result<i32, AnyError> { + let StandaloneData { + fs, + metadata, + modules, + npm_snapshot, + root_path, + vfs, + } = data; let deno_dir_provider = Arc::new(DenoDirProvider::new(None)); let root_cert_store_provider = Arc::new(StandaloneRootCertStoreProvider { ca_stores: metadata.ca_stores, @@ -459,44 +596,50 @@ pub async fn run( )); // use a dummy npm registry url let npm_registry_url = ModuleSpecifier::parse("https://localhost/").unwrap(); - let root_path = - std::env::temp_dir().join(format!("deno-compile-{}", current_exe_name)); let root_dir_url = Arc::new(ModuleSpecifier::from_directory_path(&root_path).unwrap()); let main_module = root_dir_url.join(&metadata.entrypoint_key).unwrap(); - let root_node_modules_path = root_path.join("node_modules"); - let npm_cache_dir = NpmCacheDir::new( - &RealDenoCacheEnv, - root_node_modules_path.clone(), - vec![npm_registry_url.clone()], - ); - let npm_global_cache_dir = npm_cache_dir.get_cache_location(); + let npm_global_cache_dir = root_path.join(".deno_compile_node_modules"); let cache_setting = CacheSetting::Only; - let (fs, npm_resolver, maybe_vfs_root) = match metadata.node_modules { + let pkg_json_resolver = Arc::new(PackageJsonResolver::new( + deno_runtime::deno_node::DenoFsNodeResolverEnv::new(fs.clone()), + )); + let (in_npm_pkg_checker, npm_resolver) = match metadata.node_modules { Some(binary::NodeModules::Managed { node_modules_dir }) => { - // this will always have a snapshot - let snapshot = eszip.take_npm_snapshot().unwrap(); - let vfs_root_dir_path = if node_modules_dir.is_some() { - root_path.clone() - } else { - npm_cache_dir.root_dir().to_owned() - }; - let vfs = load_npm_vfs(vfs_root_dir_path.clone()) - .context("Failed to load npm vfs.")?; + // create an npmrc that uses the fake npm_registry_url to resolve packages + let npmrc = Arc::new(ResolvedNpmRc { + default_config: deno_npm::npm_rc::RegistryConfigWithUrl { + registry_url: npm_registry_url.clone(), + config: Default::default(), + }, + scopes: Default::default(), + registry_configs: Default::default(), + }); + let npm_cache_dir = Arc::new(NpmCacheDir::new( + &DenoCacheEnvFsAdapter(fs.as_ref()), + npm_global_cache_dir, + npmrc.get_all_known_registries_urls(), + )); + let snapshot = npm_snapshot.unwrap(); let maybe_node_modules_path = node_modules_dir - .map(|node_modules_dir| vfs_root_dir_path.join(node_modules_dir)); - let fs = Arc::new(DenoCompileFileSystem::new(vfs)) - as Arc<dyn deno_fs::FileSystem>; + .map(|node_modules_dir| root_path.join(node_modules_dir)); + let in_npm_pkg_checker = + create_in_npm_pkg_checker(CreateInNpmPkgCheckerOptions::Managed( + CliManagedInNpmPkgCheckerCreateOptions { + root_cache_dir_url: npm_cache_dir.root_dir_url(), + maybe_node_modules_path: maybe_node_modules_path.as_deref(), + }, + )); let npm_resolver = create_cli_npm_resolver(CliNpmResolverCreateOptions::Managed( - CliNpmResolverManagedCreateOptions { + CliManagedNpmResolverCreateOptions { snapshot: CliNpmResolverManagedSnapshotOption::Specified(Some( snapshot, )), maybe_lockfile: None, fs: fs.clone(), http_client_provider: http_client_provider.clone(), - npm_global_cache_dir, + npm_cache_dir, cache_setting, text_only_progress_bar: progress_bar, maybe_node_modules_path, @@ -505,50 +648,54 @@ pub async fn run( // this is only used for installing packages, which isn't necessary with deno compile NpmInstallDepsProvider::empty(), ), - // create an npmrc that uses the fake npm_registry_url to resolve packages - npmrc: Arc::new(ResolvedNpmRc { - default_config: deno_npm::npm_rc::RegistryConfigWithUrl { - registry_url: npm_registry_url.clone(), - config: Default::default(), - }, - scopes: Default::default(), - registry_configs: Default::default(), - }), + npmrc, lifecycle_scripts: Default::default(), }, )) .await?; - (fs, npm_resolver, Some(vfs_root_dir_path)) + (in_npm_pkg_checker, npm_resolver) } Some(binary::NodeModules::Byonm { root_node_modules_dir, }) => { - let vfs_root_dir_path = root_path.clone(); - let vfs = load_npm_vfs(vfs_root_dir_path.clone()) - .context("Failed to load vfs.")?; let root_node_modules_dir = root_node_modules_dir.map(|p| vfs.root().join(p)); - let fs = Arc::new(DenoCompileFileSystem::new(vfs)) - as Arc<dyn deno_fs::FileSystem>; + let in_npm_pkg_checker = + create_in_npm_pkg_checker(CreateInNpmPkgCheckerOptions::Byonm); let npm_resolver = create_cli_npm_resolver( CliNpmResolverCreateOptions::Byonm(CliByonmNpmResolverCreateOptions { fs: CliDenoResolverFs(fs.clone()), + pkg_json_resolver: pkg_json_resolver.clone(), root_node_modules_dir, }), ) .await?; - (fs, npm_resolver, Some(vfs_root_dir_path)) + (in_npm_pkg_checker, npm_resolver) } None => { - let fs = Arc::new(deno_fs::RealFs) as Arc<dyn deno_fs::FileSystem>; + // Packages from different registries are already inlined in the binary, + // so no need to create actual `.npmrc` configuration. + let npmrc = create_default_npmrc(); + let npm_cache_dir = Arc::new(NpmCacheDir::new( + &DenoCacheEnvFsAdapter(fs.as_ref()), + npm_global_cache_dir, + npmrc.get_all_known_registries_urls(), + )); + let in_npm_pkg_checker = + create_in_npm_pkg_checker(CreateInNpmPkgCheckerOptions::Managed( + CliManagedInNpmPkgCheckerCreateOptions { + root_cache_dir_url: npm_cache_dir.root_dir_url(), + maybe_node_modules_path: None, + }, + )); let npm_resolver = create_cli_npm_resolver(CliNpmResolverCreateOptions::Managed( - CliNpmResolverManagedCreateOptions { + CliManagedNpmResolverCreateOptions { snapshot: CliNpmResolverManagedSnapshotOption::Specified(None), maybe_lockfile: None, fs: fs.clone(), http_client_provider: http_client_provider.clone(), - npm_global_cache_dir, + npm_cache_dir, cache_setting, text_only_progress_bar: progress_bar, maybe_node_modules_path: None, @@ -557,41 +704,53 @@ pub async fn run( // this is only used for installing packages, which isn't necessary with deno compile NpmInstallDepsProvider::empty(), ), - // Packages from different registries are already inlined in the ESZip, - // so no need to create actual `.npmrc` configuration. npmrc: create_default_npmrc(), lifecycle_scripts: Default::default(), }, )) .await?; - (fs, npm_resolver, None) + (in_npm_pkg_checker, npm_resolver) } }; let has_node_modules_dir = npm_resolver.root_node_modules_path().is_some(); let node_resolver = Arc::new(NodeResolver::new( deno_runtime::deno_node::DenoFsNodeResolverEnv::new(fs.clone()), - npm_resolver.clone().into_npm_resolver(), + in_npm_pkg_checker.clone(), + npm_resolver.clone().into_npm_pkg_folder_resolver(), + pkg_json_resolver.clone(), + )); + let cjs_tracker = Arc::new(CjsTracker::new( + in_npm_pkg_checker.clone(), + pkg_json_resolver.clone(), + IsCjsResolverOptions { + detect_cjs: !metadata.workspace_resolver.package_jsons.is_empty(), + is_node_main: false, + }, )); - let cjs_resolutions = Arc::new(CjsResolutionStore::default()); let cache_db = Caches::new(deno_dir_provider.clone()); let node_analysis_cache = NodeAnalysisCache::new(cache_db.node_analysis_db()); - let cli_node_resolver = Arc::new(CliNodeResolver::new( - cjs_resolutions.clone(), - fs.clone(), - node_resolver.clone(), - npm_resolver.clone(), - )); + let npm_req_resolver = + Arc::new(CliNpmReqResolver::new(NpmReqResolverOptions { + byonm_resolver: (npm_resolver.clone()).into_maybe_byonm(), + fs: CliDenoResolverFs(fs.clone()), + in_npm_pkg_checker: in_npm_pkg_checker.clone(), + node_resolver: node_resolver.clone(), + npm_req_resolver: npm_resolver.clone().into_npm_req_resolver(), + })); let cjs_esm_code_analyzer = CliCjsCodeAnalyzer::new( node_analysis_cache, + cjs_tracker.clone(), fs.clone(), - cli_node_resolver.clone(), + None, ); let node_code_translator = Arc::new(NodeCodeTranslator::new( cjs_esm_code_analyzer, deno_runtime::deno_node::DenoFsNodeResolverEnv::new(fs.clone()), + in_npm_pkg_checker, node_resolver.clone(), - npm_resolver.clone().into_npm_resolver(), + npm_resolver.clone().into_npm_pkg_folder_resolver(), + pkg_json_resolver.clone(), )); let workspace_resolver = { let import_map = match metadata.workspace_resolver.import_map { @@ -642,39 +801,52 @@ pub async fn run( metadata.workspace_resolver.pkg_json_resolution, ) }; + let code_cache = match metadata.code_cache_key { + Some(code_cache_key) => Some(Arc::new(DenoCompileCodeCache::new( + root_path.with_file_name(format!( + "{}.cache", + root_path.file_name().unwrap().to_string_lossy() + )), + code_cache_key, + )) as Arc<dyn CliCodeCache>), + None => { + log::debug!("Code cache disabled."); + None + } + }; let module_loader_factory = StandaloneModuleLoaderFactory { shared: Arc::new(SharedModuleLoaderState { - eszip: WorkspaceEszip { - eszip, - root_dir_url, - }, - workspace_resolver, - node_resolver: cli_node_resolver.clone(), + cjs_tracker: cjs_tracker.clone(), + fs: fs.clone(), + modules, + node_code_translator: node_code_translator.clone(), + node_resolver: node_resolver.clone(), npm_module_loader: Arc::new(NpmModuleLoader::new( - cjs_resolutions, - node_code_translator, + cjs_tracker.clone(), fs.clone(), - cli_node_resolver, + node_code_translator, )), + code_cache: code_cache.clone(), + npm_resolver: npm_resolver.clone(), + workspace_resolver, + npm_req_resolver, }), }; let permissions = { let mut permissions = metadata.permissions.to_options(/* cli_arg_urls */ &[]); - // if running with an npm vfs, grant read access to it - if let Some(vfs_root) = maybe_vfs_root { - match &mut permissions.allow_read { - Some(vec) if vec.is_empty() => { - // do nothing, already granted - } - Some(vec) => { - vec.push(vfs_root.to_string_lossy().to_string()); - } - None => { - permissions.allow_read = - Some(vec![vfs_root.to_string_lossy().to_string()]); - } + // grant read access to the vfs + match &mut permissions.allow_read { + Some(vec) if vec.is_empty() => { + // do nothing, already granted + } + Some(vec) => { + vec.push(root_path.to_string_lossy().to_string()); + } + None => { + permissions.allow_read = + Some(vec![root_path.to_string_lossy().to_string()]); } } @@ -696,8 +868,7 @@ pub async fn run( }); let worker_factory = CliMainWorkerFactory::new( Arc::new(BlobStore::default()), - // Code cache is not supported for standalone binary yet. - None, + code_cache, feature_checker, fs, None, @@ -706,6 +877,7 @@ pub async fn run( Box::new(module_loader_factory), node_resolver, npm_resolver, + pkg_json_resolver, root_cert_store_provider, permissions, StorageKeyResolver::empty(), @@ -721,7 +893,6 @@ pub async fn run( inspect_wait: false, strace_ops: None, is_inspecting: false, - is_npm_main: main_module.scheme() == "npm", skip_op_registration: true, location: metadata.location, argv0: NpmPackageReqReference::from_specifier(&main_module) @@ -739,6 +910,7 @@ pub async fn run( serve_port: None, serve_host: None, }, + metadata.otel_config, ); // Initialize v8 once from the main thread. diff --git a/cli/standalone/serialization.rs b/cli/standalone/serialization.rs new file mode 100644 index 000000000..a5eb649bf --- /dev/null +++ b/cli/standalone/serialization.rs @@ -0,0 +1,661 @@ +// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. + +use std::borrow::Cow; +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::io::Write; + +use deno_ast::MediaType; +use deno_core::anyhow::bail; +use deno_core::anyhow::Context; +use deno_core::error::AnyError; +use deno_core::serde_json; +use deno_core::url::Url; +use deno_core::FastString; +use deno_core::ModuleSourceCode; +use deno_core::ModuleType; +use deno_npm::resolution::SerializedNpmResolutionSnapshot; +use deno_npm::resolution::SerializedNpmResolutionSnapshotPackage; +use deno_npm::resolution::ValidSerializedNpmResolutionSnapshot; +use deno_npm::NpmPackageId; +use deno_semver::package::PackageReq; + +use crate::standalone::virtual_fs::VirtualDirectory; + +use super::binary::Metadata; +use super::virtual_fs::VfsBuilder; + +const MAGIC_BYTES: &[u8; 8] = b"d3n0l4nd"; + +/// Binary format: +/// * d3n0l4nd +/// * <metadata_len><metadata> +/// * <npm_snapshot_len><npm_snapshot> +/// * <remote_modules_len><remote_modules> +/// * <vfs_headers_len><vfs_headers> +/// * <vfs_file_data_len><vfs_file_data> +/// * d3n0l4nd +pub fn serialize_binary_data_section( + metadata: &Metadata, + npm_snapshot: Option<SerializedNpmResolutionSnapshot>, + remote_modules: &RemoteModulesStoreBuilder, + vfs: VfsBuilder, +) -> Result<Vec<u8>, AnyError> { + fn write_bytes_with_len(bytes: &mut Vec<u8>, data: &[u8]) { + bytes.extend_from_slice(&(data.len() as u64).to_le_bytes()); + bytes.extend_from_slice(data); + } + + let mut bytes = Vec::new(); + bytes.extend_from_slice(MAGIC_BYTES); + + // 1. Metadata + { + let metadata = serde_json::to_string(metadata)?; + write_bytes_with_len(&mut bytes, metadata.as_bytes()); + } + // 2. Npm snapshot + { + let npm_snapshot = + npm_snapshot.map(serialize_npm_snapshot).unwrap_or_default(); + write_bytes_with_len(&mut bytes, &npm_snapshot); + } + // 3. Remote modules + { + let update_index = bytes.len(); + bytes.extend_from_slice(&(0_u64).to_le_bytes()); + let start_index = bytes.len(); + remote_modules.write(&mut bytes)?; + let length = bytes.len() - start_index; + let length_bytes = (length as u64).to_le_bytes(); + bytes[update_index..update_index + length_bytes.len()] + .copy_from_slice(&length_bytes); + } + // 4. VFS + { + let (vfs, vfs_files) = vfs.into_dir_and_files(); + let vfs = serde_json::to_string(&vfs)?; + write_bytes_with_len(&mut bytes, vfs.as_bytes()); + let vfs_bytes_len = vfs_files.iter().map(|f| f.len() as u64).sum::<u64>(); + bytes.extend_from_slice(&vfs_bytes_len.to_le_bytes()); + for file in &vfs_files { + bytes.extend_from_slice(file); + } + } + + // write the magic bytes at the end so we can use it + // to make sure we've deserialized correctly + bytes.extend_from_slice(MAGIC_BYTES); + + Ok(bytes) +} + +pub struct DeserializedDataSection { + pub metadata: Metadata, + pub npm_snapshot: Option<ValidSerializedNpmResolutionSnapshot>, + pub remote_modules: RemoteModulesStore, + pub vfs_dir: VirtualDirectory, + pub vfs_files_data: &'static [u8], +} + +pub fn deserialize_binary_data_section( + data: &'static [u8], +) -> Result<Option<DeserializedDataSection>, AnyError> { + fn read_bytes_with_len(input: &[u8]) -> Result<(&[u8], &[u8]), AnyError> { + let (input, len) = read_u64(input)?; + let (input, data) = read_bytes(input, len as usize)?; + Ok((input, data)) + } + + fn read_magic_bytes(input: &[u8]) -> Result<(&[u8], bool), AnyError> { + if input.len() < MAGIC_BYTES.len() { + bail!("Unexpected end of data. Could not find magic bytes."); + } + let (magic_bytes, input) = input.split_at(MAGIC_BYTES.len()); + if magic_bytes != MAGIC_BYTES { + return Ok((input, false)); + } + Ok((input, true)) + } + + let (input, found) = read_magic_bytes(data)?; + if !found { + return Ok(None); + } + + // 1. Metadata + let (input, data) = read_bytes_with_len(input).context("reading metadata")?; + let metadata: Metadata = + serde_json::from_slice(data).context("deserializing metadata")?; + // 2. Npm snapshot + let (input, data) = + read_bytes_with_len(input).context("reading npm snapshot")?; + let npm_snapshot = if data.is_empty() { + None + } else { + Some(deserialize_npm_snapshot(data).context("deserializing npm snapshot")?) + }; + // 3. Remote modules + let (input, data) = + read_bytes_with_len(input).context("reading remote modules data")?; + let remote_modules = + RemoteModulesStore::build(data).context("deserializing remote modules")?; + // 4. VFS + let (input, data) = read_bytes_with_len(input).context("vfs")?; + let vfs_dir: VirtualDirectory = + serde_json::from_slice(data).context("deserializing vfs data")?; + let (input, vfs_files_data) = + read_bytes_with_len(input).context("reading vfs files data")?; + + // finally ensure we read the magic bytes at the end + let (_input, found) = read_magic_bytes(input)?; + if !found { + bail!("Could not find magic bytes at the end of the data."); + } + + Ok(Some(DeserializedDataSection { + metadata, + npm_snapshot, + remote_modules, + vfs_dir, + vfs_files_data, + })) +} + +#[derive(Default)] +pub struct RemoteModulesStoreBuilder { + specifiers: Vec<(String, u64)>, + data: Vec<(MediaType, Vec<u8>)>, + data_byte_len: u64, + redirects: Vec<(String, String)>, + redirects_len: u64, +} + +impl RemoteModulesStoreBuilder { + pub fn add(&mut self, specifier: &Url, media_type: MediaType, data: Vec<u8>) { + log::debug!("Adding '{}' ({})", specifier, media_type); + let specifier = specifier.to_string(); + self.specifiers.push((specifier, self.data_byte_len)); + self.data_byte_len += 1 + 8 + data.len() as u64; // media type (1 byte), data length (8 bytes), data + self.data.push((media_type, data)); + } + + pub fn add_redirects(&mut self, redirects: &BTreeMap<Url, Url>) { + self.redirects.reserve(redirects.len()); + for (from, to) in redirects { + log::debug!("Adding redirect '{}' -> '{}'", from, to); + let from = from.to_string(); + let to = to.to_string(); + self.redirects_len += (4 + from.len() + 4 + to.len()) as u64; + self.redirects.push((from, to)); + } + } + + fn write(&self, writer: &mut dyn Write) -> Result<(), AnyError> { + writer.write_all(&(self.specifiers.len() as u32).to_le_bytes())?; + writer.write_all(&(self.redirects.len() as u32).to_le_bytes())?; + for (specifier, offset) in &self.specifiers { + writer.write_all(&(specifier.len() as u32).to_le_bytes())?; + writer.write_all(specifier.as_bytes())?; + writer.write_all(&offset.to_le_bytes())?; + } + for (from, to) in &self.redirects { + writer.write_all(&(from.len() as u32).to_le_bytes())?; + writer.write_all(from.as_bytes())?; + writer.write_all(&(to.len() as u32).to_le_bytes())?; + writer.write_all(to.as_bytes())?; + } + for (media_type, data) in &self.data { + writer.write_all(&[serialize_media_type(*media_type)])?; + writer.write_all(&(data.len() as u64).to_le_bytes())?; + writer.write_all(data)?; + } + Ok(()) + } +} + +pub enum DenoCompileModuleSource { + String(&'static str), + Bytes(Cow<'static, [u8]>), +} + +impl DenoCompileModuleSource { + pub fn into_for_v8(self) -> ModuleSourceCode { + fn into_bytes(data: Cow<'static, [u8]>) -> ModuleSourceCode { + ModuleSourceCode::Bytes(match data { + Cow::Borrowed(d) => d.into(), + Cow::Owned(d) => d.into_boxed_slice().into(), + }) + } + + match self { + // todo(https://github.com/denoland/deno_core/pull/943): store whether + // the string is ascii or not ahead of time so we can avoid the is_ascii() + // check in FastString::from_static + Self::String(s) => ModuleSourceCode::String(FastString::from_static(s)), + Self::Bytes(b) => into_bytes(b), + } + } +} + +pub struct DenoCompileModuleData<'a> { + pub specifier: &'a Url, + pub media_type: MediaType, + pub data: Cow<'static, [u8]>, +} + +impl<'a> DenoCompileModuleData<'a> { + pub fn into_parts(self) -> (&'a Url, ModuleType, DenoCompileModuleSource) { + fn into_string_unsafe(data: Cow<'static, [u8]>) -> DenoCompileModuleSource { + match data { + Cow::Borrowed(d) => DenoCompileModuleSource::String( + // SAFETY: we know this is a valid utf8 string + unsafe { std::str::from_utf8_unchecked(d) }, + ), + Cow::Owned(d) => DenoCompileModuleSource::Bytes(Cow::Owned(d)), + } + } + + let (media_type, source) = match self.media_type { + MediaType::JavaScript + | MediaType::Jsx + | MediaType::Mjs + | MediaType::Cjs + | MediaType::TypeScript + | MediaType::Mts + | MediaType::Cts + | MediaType::Dts + | MediaType::Dmts + | MediaType::Dcts + | MediaType::Tsx => { + (ModuleType::JavaScript, into_string_unsafe(self.data)) + } + MediaType::Json => (ModuleType::Json, into_string_unsafe(self.data)), + MediaType::Wasm => { + (ModuleType::Wasm, DenoCompileModuleSource::Bytes(self.data)) + } + // just assume javascript if we made it here + MediaType::Css | MediaType::SourceMap | MediaType::Unknown => ( + ModuleType::JavaScript, + DenoCompileModuleSource::Bytes(self.data), + ), + }; + (self.specifier, media_type, source) + } +} + +enum RemoteModulesStoreSpecifierValue { + Data(usize), + Redirect(Url), +} + +pub struct RemoteModulesStore { + specifiers: HashMap<Url, RemoteModulesStoreSpecifierValue>, + files_data: &'static [u8], +} + +impl RemoteModulesStore { + fn build(data: &'static [u8]) -> Result<Self, AnyError> { + fn read_specifier(input: &[u8]) -> Result<(&[u8], (Url, u64)), AnyError> { + let (input, specifier) = read_string_lossy(input)?; + let specifier = Url::parse(&specifier)?; + let (input, offset) = read_u64(input)?; + Ok((input, (specifier, offset))) + } + + fn read_redirect(input: &[u8]) -> Result<(&[u8], (Url, Url)), AnyError> { + let (input, from) = read_string_lossy(input)?; + let from = Url::parse(&from)?; + let (input, to) = read_string_lossy(input)?; + let to = Url::parse(&to)?; + Ok((input, (from, to))) + } + + fn read_headers( + input: &[u8], + ) -> Result<(&[u8], HashMap<Url, RemoteModulesStoreSpecifierValue>), AnyError> + { + let (input, specifiers_len) = read_u32_as_usize(input)?; + let (mut input, redirects_len) = read_u32_as_usize(input)?; + let mut specifiers = + HashMap::with_capacity(specifiers_len + redirects_len); + for _ in 0..specifiers_len { + let (current_input, (specifier, offset)) = + read_specifier(input).context("reading specifier")?; + input = current_input; + specifiers.insert( + specifier, + RemoteModulesStoreSpecifierValue::Data(offset as usize), + ); + } + + for _ in 0..redirects_len { + let (current_input, (from, to)) = read_redirect(input)?; + input = current_input; + specifiers.insert(from, RemoteModulesStoreSpecifierValue::Redirect(to)); + } + + Ok((input, specifiers)) + } + + let (files_data, specifiers) = read_headers(data)?; + + Ok(Self { + specifiers, + files_data, + }) + } + + pub fn resolve_specifier<'a>( + &'a self, + specifier: &'a Url, + ) -> Result<Option<&'a Url>, AnyError> { + let mut count = 0; + let mut current = specifier; + loop { + if count > 10 { + bail!("Too many redirects resolving '{}'", specifier); + } + match self.specifiers.get(current) { + Some(RemoteModulesStoreSpecifierValue::Redirect(to)) => { + current = to; + count += 1; + } + Some(RemoteModulesStoreSpecifierValue::Data(_)) => { + return Ok(Some(current)); + } + None => { + return Ok(None); + } + } + } + } + + pub fn read<'a>( + &'a self, + original_specifier: &'a Url, + ) -> Result<Option<DenoCompileModuleData<'a>>, AnyError> { + let mut count = 0; + let mut specifier = original_specifier; + loop { + if count > 10 { + bail!("Too many redirects resolving '{}'", original_specifier); + } + match self.specifiers.get(specifier) { + Some(RemoteModulesStoreSpecifierValue::Redirect(to)) => { + specifier = to; + count += 1; + } + Some(RemoteModulesStoreSpecifierValue::Data(offset)) => { + let input = &self.files_data[*offset..]; + let (input, media_type_byte) = read_bytes(input, 1)?; + let media_type = deserialize_media_type(media_type_byte[0])?; + let (input, len) = read_u64(input)?; + let (_input, data) = read_bytes(input, len as usize)?; + return Ok(Some(DenoCompileModuleData { + specifier, + media_type, + data: Cow::Borrowed(data), + })); + } + None => { + return Ok(None); + } + } + } + } +} + +fn serialize_npm_snapshot( + mut snapshot: SerializedNpmResolutionSnapshot, +) -> Vec<u8> { + fn append_string(bytes: &mut Vec<u8>, string: &str) { + let len = string.len() as u32; + bytes.extend_from_slice(&len.to_le_bytes()); + bytes.extend_from_slice(string.as_bytes()); + } + + snapshot.packages.sort_by(|a, b| a.id.cmp(&b.id)); // determinism + let ids_to_stored_ids = snapshot + .packages + .iter() + .enumerate() + .map(|(i, pkg)| (&pkg.id, i as u32)) + .collect::<HashMap<_, _>>(); + + let mut root_packages: Vec<_> = snapshot.root_packages.iter().collect(); + root_packages.sort(); + let mut bytes = Vec::new(); + + bytes.extend_from_slice(&(snapshot.packages.len() as u32).to_le_bytes()); + for pkg in &snapshot.packages { + append_string(&mut bytes, &pkg.id.as_serialized()); + } + + bytes.extend_from_slice(&(root_packages.len() as u32).to_le_bytes()); + for (req, id) in root_packages { + append_string(&mut bytes, &req.to_string()); + let id = ids_to_stored_ids.get(&id).unwrap(); + bytes.extend_from_slice(&id.to_le_bytes()); + } + + for pkg in &snapshot.packages { + let deps_len = pkg.dependencies.len() as u32; + bytes.extend_from_slice(&deps_len.to_le_bytes()); + let mut deps: Vec<_> = pkg.dependencies.iter().collect(); + deps.sort(); + for (req, id) in deps { + append_string(&mut bytes, req); + let id = ids_to_stored_ids.get(&id).unwrap(); + bytes.extend_from_slice(&id.to_le_bytes()); + } + } + + bytes +} + +fn deserialize_npm_snapshot( + input: &[u8], +) -> Result<ValidSerializedNpmResolutionSnapshot, AnyError> { + fn parse_id(input: &[u8]) -> Result<(&[u8], NpmPackageId), AnyError> { + let (input, id) = read_string_lossy(input)?; + let id = NpmPackageId::from_serialized(&id)?; + Ok((input, id)) + } + + #[allow(clippy::needless_lifetimes)] // clippy bug + fn parse_root_package<'a>( + id_to_npm_id: &'a impl Fn(usize) -> Result<NpmPackageId, AnyError>, + ) -> impl Fn(&[u8]) -> Result<(&[u8], (PackageReq, NpmPackageId)), AnyError> + 'a + { + |input| { + let (input, req) = read_string_lossy(input)?; + let req = PackageReq::from_str(&req)?; + let (input, id) = read_u32_as_usize(input)?; + Ok((input, (req, id_to_npm_id(id)?))) + } + } + + #[allow(clippy::needless_lifetimes)] // clippy bug + fn parse_package_dep<'a>( + id_to_npm_id: &'a impl Fn(usize) -> Result<NpmPackageId, AnyError>, + ) -> impl Fn(&[u8]) -> Result<(&[u8], (String, NpmPackageId)), AnyError> + 'a + { + |input| { + let (input, req) = read_string_lossy(input)?; + let (input, id) = read_u32_as_usize(input)?; + Ok((input, (req.into_owned(), id_to_npm_id(id)?))) + } + } + + fn parse_package<'a>( + input: &'a [u8], + id: NpmPackageId, + id_to_npm_id: &impl Fn(usize) -> Result<NpmPackageId, AnyError>, + ) -> Result<(&'a [u8], SerializedNpmResolutionSnapshotPackage), AnyError> { + let (input, deps_len) = read_u32_as_usize(input)?; + let (input, dependencies) = + parse_hashmap_n_times(input, deps_len, parse_package_dep(id_to_npm_id))?; + Ok(( + input, + SerializedNpmResolutionSnapshotPackage { + id, + system: Default::default(), + dist: Default::default(), + dependencies, + optional_dependencies: Default::default(), + bin: None, + scripts: Default::default(), + deprecated: Default::default(), + }, + )) + } + + let (input, packages_len) = read_u32_as_usize(input)?; + + // get a hashmap of all the npm package ids to their serialized ids + let (input, data_ids_to_npm_ids) = + parse_vec_n_times(input, packages_len, parse_id) + .context("deserializing id")?; + let data_id_to_npm_id = |id: usize| { + data_ids_to_npm_ids + .get(id) + .cloned() + .ok_or_else(|| deno_core::anyhow::anyhow!("Invalid npm package id")) + }; + + let (input, root_packages_len) = read_u32_as_usize(input)?; + let (input, root_packages) = parse_hashmap_n_times( + input, + root_packages_len, + parse_root_package(&data_id_to_npm_id), + ) + .context("deserializing root package")?; + let (input, packages) = + parse_vec_n_times_with_index(input, packages_len, |input, index| { + parse_package(input, data_id_to_npm_id(index)?, &data_id_to_npm_id) + }) + .context("deserializing package")?; + + if !input.is_empty() { + bail!("Unexpected data left over"); + } + + Ok( + SerializedNpmResolutionSnapshot { + packages, + root_packages, + } + // this is ok because we have already verified that all the + // identifiers found in the snapshot are valid via the + // npm package id -> npm package id mapping + .into_valid_unsafe(), + ) +} + +fn serialize_media_type(media_type: MediaType) -> u8 { + match media_type { + MediaType::JavaScript => 0, + MediaType::Jsx => 1, + MediaType::Mjs => 2, + MediaType::Cjs => 3, + MediaType::TypeScript => 4, + MediaType::Mts => 5, + MediaType::Cts => 6, + MediaType::Dts => 7, + MediaType::Dmts => 8, + MediaType::Dcts => 9, + MediaType::Tsx => 10, + MediaType::Json => 11, + MediaType::Wasm => 12, + MediaType::Css => 13, + MediaType::SourceMap => 14, + MediaType::Unknown => 15, + } +} + +fn deserialize_media_type(value: u8) -> Result<MediaType, AnyError> { + match value { + 0 => Ok(MediaType::JavaScript), + 1 => Ok(MediaType::Jsx), + 2 => Ok(MediaType::Mjs), + 3 => Ok(MediaType::Cjs), + 4 => Ok(MediaType::TypeScript), + 5 => Ok(MediaType::Mts), + 6 => Ok(MediaType::Cts), + 7 => Ok(MediaType::Dts), + 8 => Ok(MediaType::Dmts), + 9 => Ok(MediaType::Dcts), + 10 => Ok(MediaType::Tsx), + 11 => Ok(MediaType::Json), + 12 => Ok(MediaType::Wasm), + 13 => Ok(MediaType::Css), + 14 => Ok(MediaType::SourceMap), + 15 => Ok(MediaType::Unknown), + _ => bail!("Unknown media type value: {}", value), + } +} + +fn parse_hashmap_n_times<TKey: std::cmp::Eq + std::hash::Hash, TValue>( + mut input: &[u8], + times: usize, + parse: impl Fn(&[u8]) -> Result<(&[u8], (TKey, TValue)), AnyError>, +) -> Result<(&[u8], HashMap<TKey, TValue>), AnyError> { + let mut results = HashMap::with_capacity(times); + for _ in 0..times { + let result = parse(input); + let (new_input, (key, value)) = result?; + results.insert(key, value); + input = new_input; + } + Ok((input, results)) +} + +fn parse_vec_n_times<TResult>( + input: &[u8], + times: usize, + parse: impl Fn(&[u8]) -> Result<(&[u8], TResult), AnyError>, +) -> Result<(&[u8], Vec<TResult>), AnyError> { + parse_vec_n_times_with_index(input, times, |input, _index| parse(input)) +} + +fn parse_vec_n_times_with_index<TResult>( + mut input: &[u8], + times: usize, + parse: impl Fn(&[u8], usize) -> Result<(&[u8], TResult), AnyError>, +) -> Result<(&[u8], Vec<TResult>), AnyError> { + let mut results = Vec::with_capacity(times); + for i in 0..times { + let result = parse(input, i); + let (new_input, result) = result?; + results.push(result); + input = new_input; + } + Ok((input, results)) +} + +fn read_bytes(input: &[u8], len: usize) -> Result<(&[u8], &[u8]), AnyError> { + if input.len() < len { + bail!("Unexpected end of data.",); + } + let (len_bytes, input) = input.split_at(len); + Ok((input, len_bytes)) +} + +fn read_string_lossy(input: &[u8]) -> Result<(&[u8], Cow<str>), AnyError> { + let (input, str_len) = read_u32_as_usize(input)?; + let (input, data_bytes) = read_bytes(input, str_len)?; + Ok((input, String::from_utf8_lossy(data_bytes))) +} + +fn read_u32_as_usize(input: &[u8]) -> Result<(&[u8], usize), AnyError> { + let (input, len_bytes) = read_bytes(input, 4)?; + let len = u32::from_le_bytes(len_bytes.try_into()?); + Ok((input, len as usize)) +} + +fn read_u64(input: &[u8]) -> Result<(&[u8], u64), AnyError> { + let (input, len_bytes) = read_bytes(input, 8)?; + let len = u64::from_le_bytes(len_bytes.try_into()?); + Ok((input, len)) +} diff --git a/cli/standalone/virtual_fs.rs b/cli/standalone/virtual_fs.rs index 53d045b62..26bb0db75 100644 --- a/cli/standalone/virtual_fs.rs +++ b/cli/standalone/virtual_fs.rs @@ -7,6 +7,7 @@ use std::fs::File; use std::io::Read; use std::io::Seek; use std::io::SeekFrom; +use std::ops::Range; use std::path::Path; use std::path::PathBuf; use std::rc::Rc; @@ -67,6 +68,26 @@ impl VfsBuilder { }) } + pub fn set_new_root_path( + &mut self, + root_path: PathBuf, + ) -> Result<(), AnyError> { + let root_path = canonicalize_path(&root_path)?; + self.root_path = root_path; + self.root_dir = VirtualDirectory { + name: self + .root_path + .file_stem() + .map(|s| s.to_string_lossy().into_owned()) + .unwrap_or("root".to_string()), + entries: vec![VfsEntry::Dir(VirtualDirectory { + name: std::mem::take(&mut self.root_dir.name), + entries: std::mem::take(&mut self.root_dir.entries), + })], + }; + Ok(()) + } + pub fn with_root_dir<R>( &mut self, with_root: impl FnOnce(&mut VirtualDirectory) -> R, @@ -119,7 +140,7 @@ impl VfsBuilder { // inline the symlink and make the target file let file_bytes = std::fs::read(&target) .with_context(|| format!("Reading {}", path.display()))?; - self.add_file(&path, file_bytes)?; + self.add_file_with_data_inner(&path, file_bytes)?; } else { log::warn!( "{} Symlink target is outside '{}'. Excluding symlink at '{}' with target '{}'.", @@ -191,16 +212,32 @@ impl VfsBuilder { self.add_file_at_path_not_symlink(&target_path) } - pub fn add_file_at_path_not_symlink( + fn add_file_at_path_not_symlink( &mut self, path: &Path, ) -> Result<(), AnyError> { let file_bytes = std::fs::read(path) .with_context(|| format!("Reading {}", path.display()))?; - self.add_file(path, file_bytes) + self.add_file_with_data_inner(path, file_bytes) } - fn add_file(&mut self, path: &Path, data: Vec<u8>) -> Result<(), AnyError> { + pub fn add_file_with_data( + &mut self, + path: &Path, + data: Vec<u8>, + ) -> Result<(), AnyError> { + let target_path = canonicalize_path(path)?; + if target_path != path { + self.add_symlink(path, &target_path)?; + } + self.add_file_with_data_inner(&target_path, data) + } + + fn add_file_with_data_inner( + &mut self, + path: &Path, + data: Vec<u8>, + ) -> Result<(), AnyError> { log::debug!("Adding file '{}'", path.display()); let checksum = util::checksum::gen(&[&data]); let offset = if let Some(offset) = self.file_offsets.get(&checksum) { @@ -249,8 +286,15 @@ impl VfsBuilder { path.display(), target.display() ); - let dest = self.path_relative_root(target)?; - if dest == self.path_relative_root(path)? { + let relative_target = self.path_relative_root(target)?; + let relative_path = match self.path_relative_root(path) { + Ok(path) => path, + Err(StripRootError { .. }) => { + // ignore if the original path is outside the root directory + return Ok(()); + } + }; + if relative_target == relative_path { // it's the same, ignore return Ok(()); } @@ -263,7 +307,7 @@ impl VfsBuilder { insert_index, VfsEntry::Symlink(VirtualSymlink { name: name.to_string(), - dest_parts: dest + dest_parts: relative_target .components() .map(|c| c.as_os_str().to_string_lossy().to_string()) .collect::<Vec<_>>(), @@ -306,6 +350,7 @@ impl<'a> VfsEntryRef<'a> { atime: None, birthtime: None, mtime: None, + ctime: None, blksize: 0, size: 0, dev: 0, @@ -328,6 +373,7 @@ impl<'a> VfsEntryRef<'a> { atime: None, birthtime: None, mtime: None, + ctime: None, blksize: 0, size: file.len, dev: 0, @@ -350,6 +396,7 @@ impl<'a> VfsEntryRef<'a> { atime: None, birthtime: None, mtime: None, + ctime: None, blksize: 0, size: 0, dev: 0, @@ -751,14 +798,14 @@ impl deno_io::fs::File for FileBackedVfsFile { #[derive(Debug)] pub struct FileBackedVfs { - file: Mutex<Vec<u8>>, + vfs_data: Cow<'static, [u8]>, fs_root: VfsRoot, } impl FileBackedVfs { - pub fn new(file: Vec<u8>, fs_root: VfsRoot) -> Self { + pub fn new(data: Cow<'static, [u8]>, fs_root: VfsRoot) -> Self { Self { - file: Mutex::new(file), + vfs_data: data, fs_root, } } @@ -827,10 +874,15 @@ impl FileBackedVfs { Ok(path) } - pub fn read_file_all(&self, file: &VirtualFile) -> std::io::Result<Vec<u8>> { - let mut buf = vec![0; file.len as usize]; - self.read_file(file, 0, &mut buf)?; - Ok(buf) + pub fn read_file_all( + &self, + file: &VirtualFile, + ) -> std::io::Result<Cow<'static, [u8]>> { + let read_range = self.get_read_range(file, 0, file.len)?; + match &self.vfs_data { + Cow::Borrowed(data) => Ok(Cow::Borrowed(&data[read_range])), + Cow::Owned(data) => Ok(Cow::Owned(data[read_range].to_vec())), + } } pub fn read_file( @@ -839,18 +891,27 @@ impl FileBackedVfs { pos: u64, buf: &mut [u8], ) -> std::io::Result<usize> { - let data = self.file.lock(); + let read_range = self.get_read_range(file, pos, buf.len() as u64)?; + buf.copy_from_slice(&self.vfs_data[read_range]); + Ok(buf.len()) + } + + fn get_read_range( + &self, + file: &VirtualFile, + pos: u64, + len: u64, + ) -> std::io::Result<Range<usize>> { + let data = &self.vfs_data; let start = self.fs_root.start_file_offset + file.offset + pos; - let end = start + buf.len() as u64; + let end = start + len; if end > data.len() as u64 { return Err(std::io::Error::new( std::io::ErrorKind::UnexpectedEof, "unexpected EOF", )); } - - buf.copy_from_slice(&data[start as usize..end as usize]); - Ok(buf.len()) + Ok(start as usize..end as usize) } pub fn dir_entry(&self, path: &Path) -> std::io::Result<&VirtualDirectory> { @@ -888,7 +949,7 @@ mod test { #[track_caller] fn read_file(vfs: &FileBackedVfs, path: &Path) -> String { let file = vfs.file_entry(path).unwrap(); - String::from_utf8(vfs.read_file_all(file).unwrap()).unwrap() + String::from_utf8(vfs.read_file_all(file).unwrap().into_owned()).unwrap() } #[test] @@ -901,20 +962,23 @@ mod test { let src_path = src_path.to_path_buf(); let mut builder = VfsBuilder::new(src_path.clone()).unwrap(); builder - .add_file(&src_path.join("a.txt"), "data".into()) + .add_file_with_data_inner(&src_path.join("a.txt"), "data".into()) .unwrap(); builder - .add_file(&src_path.join("b.txt"), "data".into()) + .add_file_with_data_inner(&src_path.join("b.txt"), "data".into()) .unwrap(); assert_eq!(builder.files.len(), 1); // because duplicate data builder - .add_file(&src_path.join("c.txt"), "c".into()) + .add_file_with_data_inner(&src_path.join("c.txt"), "c".into()) .unwrap(); builder - .add_file(&src_path.join("sub_dir").join("d.txt"), "d".into()) + .add_file_with_data_inner( + &src_path.join("sub_dir").join("d.txt"), + "d".into(), + ) .unwrap(); builder - .add_file(&src_path.join("e.txt"), "e".into()) + .add_file_with_data_inner(&src_path.join("e.txt"), "e".into()) .unwrap(); builder .add_symlink( @@ -1031,7 +1095,7 @@ mod test { ( dest_path.to_path_buf(), FileBackedVfs::new( - data, + Cow::Owned(data), VfsRoot { dir: root_dir, root_path: dest_path.to_path_buf(), @@ -1082,7 +1146,7 @@ mod test { let temp_path = temp_dir.path().canonicalize(); let mut builder = VfsBuilder::new(temp_path.to_path_buf()).unwrap(); builder - .add_file( + .add_file_with_data_inner( temp_path.join("a.txt").as_path(), "0123456789".to_string().into_bytes(), ) |