diff options
author | David Sherret <dsherret@users.noreply.github.com> | 2024-01-31 22:15:22 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-01 03:15:22 +0000 |
commit | 4b7c6049ef9d40394eb823859c82cbf8d293430d (patch) | |
tree | 61e6de7c69c9d00faeef0ff7e6c223224a53de9e /cli/file_fetcher.rs | |
parent | 830d096b66696ad9f4e67b3ed8460fb1ff7a9170 (diff) |
refactor: load bytes in deno_graph (#22212)
Upgrades deno_graph to 0.64 where deno_graph is now responsible for
turning bytes into a string. This is in preparation for Wasm modules.
Diffstat (limited to 'cli/file_fetcher.rs')
-rw-r--r-- | cli/file_fetcher.rs | 480 |
1 files changed, 109 insertions, 371 deletions
diff --git a/cli/file_fetcher.rs b/cli/file_fetcher.rs index bbcdd3f84..5a7ca2b84 100644 --- a/cli/file_fetcher.rs +++ b/cli/file_fetcher.rs @@ -12,10 +12,9 @@ use crate::http_util::HeadersMap; use crate::http_util::HttpClient; use crate::util::progress_bar::ProgressBar; use crate::util::progress_bar::UpdateGuard; -use crate::util::text_encoding; -use data_url::DataUrl; use deno_ast::MediaType; +use deno_core::anyhow::Context; use deno_core::error::custom_error; use deno_core::error::generic_error; use deno_core::error::uri_error; @@ -45,21 +44,60 @@ use std::time::SystemTime; pub const SUPPORTED_SCHEMES: [&str; 5] = ["data", "blob", "file", "http", "https"]; -/// A structure representing a source file. #[derive(Debug, Clone, Eq, PartialEq)] -pub struct File { - /// For remote files, if there was an `X-TypeScript-Type` header, the parsed - /// out value of that header. - pub maybe_types: Option<String>, - /// The resolved media type for the file. +pub struct TextDecodedFile { pub media_type: MediaType, - /// The source of the file as a string. - pub source: Arc<str>, /// The _final_ specifier for the file. The requested specifier and the final /// specifier maybe different for remote files that have been redirected. pub specifier: ModuleSpecifier, + /// The source of the file. + pub source: Arc<str>, +} +/// A structure representing a source file. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct File { + /// The _final_ specifier for the file. The requested specifier and the final + /// specifier maybe different for remote files that have been redirected. + pub specifier: ModuleSpecifier, pub maybe_headers: Option<HashMap<String, String>>, + /// The source of the file. + pub source: Arc<[u8]>, +} + +impl File { + pub fn resolve_media_type_and_charset(&self) -> (MediaType, Option<&str>) { + deno_graph::source::resolve_media_type_and_charset_from_headers( + &self.specifier, + self.maybe_headers.as_ref(), + ) + } + + /// Decodes the source bytes into a string handling any encoding rules + /// for local vs remote files and dealing with the charset. + pub fn into_text_decoded(self) -> Result<TextDecodedFile, AnyError> { + // lots of borrow checker fighting here + let (media_type, maybe_charset) = + deno_graph::source::resolve_media_type_and_charset_from_headers( + &self.specifier, + self.maybe_headers.as_ref(), + ); + let specifier = self.specifier; + match deno_graph::source::decode_source( + &specifier, + self.source, + maybe_charset, + ) { + Ok(source) => Ok(TextDecodedFile { + media_type, + specifier, + source, + }), + Err(err) => { + Err(err).with_context(|| format!("Failed decoding \"{}\".", specifier)) + } + } + } } /// Simple struct implementing in-process caching to prevent multiple @@ -85,49 +123,14 @@ fn fetch_local(specifier: &ModuleSpecifier) -> Result<File, AnyError> { uri_error(format!("Invalid file path.\n Specifier: {specifier}")) })?; let bytes = fs::read(local)?; - let charset = text_encoding::detect_charset(&bytes).to_string(); - let source = get_source_from_bytes(bytes, Some(charset))?; - let media_type = MediaType::from_specifier(specifier); Ok(File { - maybe_types: None, - media_type, - source: source.into(), specifier: specifier.clone(), maybe_headers: None, + source: bytes.into(), }) } -/// Returns the decoded body and content-type of a provided -/// data URL. -pub fn get_source_from_data_url( - specifier: &ModuleSpecifier, -) -> Result<(String, String), AnyError> { - let data_url = DataUrl::process(specifier.as_str()) - .map_err(|e| uri_error(format!("{e:?}")))?; - let mime = data_url.mime_type(); - let charset = mime.get_parameter("charset").map(|v| v.to_string()); - let (bytes, _) = data_url - .decode_to_vec() - .map_err(|e| uri_error(format!("{e:?}")))?; - Ok((get_source_from_bytes(bytes, charset)?, format!("{mime}"))) -} - -/// Given a vector of bytes and optionally a charset, decode the bytes to a -/// string. -pub fn get_source_from_bytes( - bytes: Vec<u8>, - maybe_charset: Option<String>, -) -> Result<String, AnyError> { - let source = if let Some(charset) = maybe_charset { - text_encoding::convert_to_utf8(&bytes, &charset)?.to_string() - } else { - String::from_utf8(bytes)? - }; - - Ok(source) -} - /// Return a validated scheme for a given module specifier. fn get_validated_scheme( specifier: &ModuleSpecifier, @@ -142,27 +145,6 @@ fn get_validated_scheme( } } -/// Resolve a media type and optionally the charset from a module specifier and -/// the value of a content type header. -pub fn map_content_type( - specifier: &ModuleSpecifier, - maybe_content_type: Option<&String>, -) -> (MediaType, Option<String>) { - if let Some(content_type) = maybe_content_type { - let mut content_types = content_type.split(';'); - let content_type = content_types.next().unwrap(); - let media_type = MediaType::from_content_type(specifier, content_type); - let charset = content_types - .map(str::trim) - .find_map(|s| s.strip_prefix("charset=")) - .map(String::from); - - (media_type, charset) - } else { - (MediaType::from_specifier(specifier), None) - } -} - pub struct FetchOptions<'a> { pub specifier: &'a ModuleSpecifier, pub permissions: PermissionsContainer, @@ -215,34 +197,6 @@ impl FileFetcher { self.download_log_level = level; } - /// Creates a `File` structure for a remote file. - fn build_remote_file( - &self, - specifier: &ModuleSpecifier, - bytes: Vec<u8>, - headers: &HashMap<String, String>, - ) -> Result<File, AnyError> { - let maybe_content_type = headers.get("content-type"); - let (media_type, maybe_charset) = - map_content_type(specifier, maybe_content_type); - let source = get_source_from_bytes(bytes, maybe_charset)?; - let maybe_types = match media_type { - MediaType::JavaScript - | MediaType::Cjs - | MediaType::Mjs - | MediaType::Jsx => headers.get("x-typescript-types").cloned(), - _ => None, - }; - - Ok(File { - maybe_types, - media_type, - source: source.into(), - specifier: specifier.clone(), - maybe_headers: Some(headers.clone()), - }) - } - /// Fetch cached remote file. /// /// This is a recursive operation if source file has redirections. @@ -269,9 +223,12 @@ impl FileFetcher { let Some(bytes) = self.http_cache.read_file_bytes(&cache_key)? else { return Ok(None); }; - let file = self.build_remote_file(specifier, bytes, &headers)?; - Ok(Some(file)) + Ok(Some(File { + specifier: specifier.clone(), + maybe_headers: Some(headers), + source: Arc::from(bytes), + })) } /// Convert a data URL into a file, resulting in an error if the URL is @@ -281,16 +238,12 @@ impl FileFetcher { specifier: &ModuleSpecifier, ) -> Result<File, AnyError> { debug!("FileFetcher::fetch_data_url() - specifier: {}", specifier); - let (source, content_type) = get_source_from_data_url(specifier)?; - let (media_type, _) = map_content_type(specifier, Some(&content_type)); - let mut headers = HashMap::new(); - headers.insert("content-type".to_string(), content_type); + let data_url = deno_graph::source::RawDataUrl::parse(specifier)?; + let (bytes, headers) = data_url.into_bytes_and_headers(); Ok(File { - maybe_types: None, - media_type, - source: source.into(), specifier: specifier.clone(), maybe_headers: Some(headers), + source: Arc::from(bytes), }) } @@ -310,21 +263,14 @@ impl FileFetcher { ) })?; - let content_type = blob.media_type.clone(); let bytes = blob.read_all().await?; - - let (media_type, maybe_charset) = - map_content_type(specifier, Some(&content_type)); - let source = get_source_from_bytes(bytes, maybe_charset)?; - let mut headers = HashMap::new(); - headers.insert("content-type".to_string(), content_type); + let headers = + HashMap::from([("content-type".to_string(), blob.media_type.clone())]); Ok(File { - maybe_types: None, - media_type, - source: source.into(), specifier: specifier.clone(), maybe_headers: Some(headers), + source: Arc::from(bytes), }) } @@ -453,9 +399,11 @@ impl FileFetcher { file_fetcher .http_cache .set(&specifier, headers.clone(), &bytes)?; - let file = - file_fetcher.build_remote_file(&specifier, bytes, &headers)?; - Ok(file) + Ok(File { + specifier, + maybe_headers: Some(headers), + source: Arc::from(bytes), + }) } FetchOnceResult::RequestError(err) => { handle_request_or_server_error(&mut retried, &specifier, err) @@ -767,16 +715,6 @@ mod tests { (file_fetcher, temp_dir, blob_store) } - macro_rules! file_url { - ($path:expr) => { - if cfg!(target_os = "windows") { - concat!("file:///C:", $path) - } else { - concat!("file://", $path) - } - }; - } - async fn test_fetch(specifier: &ModuleSpecifier) -> (File, FileFetcher) { let (file_fetcher, _) = setup(CacheSetting::ReloadAll, None); let result = file_fetcher @@ -812,6 +750,9 @@ mod tests { ) } + // this test used to test how the file fetcher decoded strings, but + // now we're using it as a bit of an integration test with the functionality + // in deno_graph async fn test_fetch_remote_encoded( fixture: &str, charset: &str, @@ -820,8 +761,18 @@ mod tests { let url_str = format!("http://127.0.0.1:4545/encoding/{fixture}"); let specifier = resolve_url(&url_str).unwrap(); let (file, headers) = test_fetch_remote(&specifier).await; - assert_eq!(&*file.source, expected); - assert_eq!(file.media_type, MediaType::TypeScript); + let (media_type, maybe_charset) = + deno_graph::source::resolve_media_type_and_charset_from_headers( + &specifier, + Some(&headers), + ); + assert_eq!( + deno_graph::source::decode_source(&specifier, file.source, maybe_charset) + .unwrap() + .as_ref(), + expected + ); + assert_eq!(media_type, MediaType::TypeScript); assert_eq!( headers.get("content-type").unwrap(), &format!("application/typescript;charset={charset}") @@ -832,7 +783,12 @@ mod tests { let p = test_util::testdata_path().join(format!("encoding/{charset}.ts")); let specifier = ModuleSpecifier::from_file_path(p).unwrap(); let (file, _) = test_fetch(&specifier).await; - assert_eq!(&*file.source, expected); + assert_eq!( + deno_graph::source::decode_source(&specifier, file.source, None) + .unwrap() + .as_ref(), + expected + ); } #[test] @@ -857,192 +813,18 @@ mod tests { } } - #[test] - fn test_map_content_type() { - let fixtures = vec![ - // Extension only - (file_url!("/foo/bar.ts"), None, MediaType::TypeScript, None), - (file_url!("/foo/bar.tsx"), None, MediaType::Tsx, None), - (file_url!("/foo/bar.d.cts"), None, MediaType::Dcts, None), - (file_url!("/foo/bar.d.mts"), None, MediaType::Dmts, None), - (file_url!("/foo/bar.d.ts"), None, MediaType::Dts, None), - (file_url!("/foo/bar.js"), None, MediaType::JavaScript, None), - (file_url!("/foo/bar.jsx"), None, MediaType::Jsx, None), - (file_url!("/foo/bar.json"), None, MediaType::Json, None), - (file_url!("/foo/bar.wasm"), None, MediaType::Wasm, None), - (file_url!("/foo/bar.cjs"), None, MediaType::Cjs, None), - (file_url!("/foo/bar.mjs"), None, MediaType::Mjs, None), - (file_url!("/foo/bar.cts"), None, MediaType::Cts, None), - (file_url!("/foo/bar.mts"), None, MediaType::Mts, None), - (file_url!("/foo/bar"), None, MediaType::Unknown, None), - // Media type no extension - ( - "https://deno.land/x/mod", - Some("application/typescript".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/typescript".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("video/vnd.dlna.mpeg-tts".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("video/mp2t".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("application/x-typescript".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("application/javascript".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/javascript".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("application/ecmascript".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/ecmascript".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("application/x-javascript".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("application/node".to_string()), - MediaType::JavaScript, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/jsx".to_string()), - MediaType::Jsx, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/tsx".to_string()), - MediaType::Tsx, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/json".to_string()), - MediaType::Json, - None, - ), - ( - "https://deno.land/x/mod", - Some("text/json; charset=utf-8".to_string()), - MediaType::Json, - Some("utf-8".to_string()), - ), - // Extension with media type - ( - "https://deno.land/x/mod.ts", - Some("text/plain".to_string()), - MediaType::TypeScript, - None, - ), - ( - "https://deno.land/x/mod.ts", - Some("foo/bar".to_string()), - MediaType::Unknown, - None, - ), - ( - "https://deno.land/x/mod.tsx", - Some("application/typescript".to_string()), - MediaType::Tsx, - None, - ), - ( - "https://deno.land/x/mod.tsx", - Some("application/javascript".to_string()), - MediaType::Tsx, - None, - ), - ( - "https://deno.land/x/mod.jsx", - Some("application/javascript".to_string()), - MediaType::Jsx, - None, - ), - ( - "https://deno.land/x/mod.jsx", - Some("application/x-typescript".to_string()), - MediaType::Jsx, - None, - ), - ( - "https://deno.land/x/mod.d.ts", - Some("application/javascript".to_string()), - MediaType::Dts, - None, - ), - ( - "https://deno.land/x/mod.d.ts", - Some("text/plain".to_string()), - MediaType::Dts, - None, - ), - ( - "https://deno.land/x/mod.d.ts", - Some("application/x-typescript".to_string()), - MediaType::Dts, - None, - ), - ]; - - for (specifier, maybe_content_type, media_type, maybe_charset) in fixtures { - let specifier = ModuleSpecifier::parse(specifier).unwrap(); - assert_eq!( - map_content_type(&specifier, maybe_content_type.as_ref()), - (media_type, maybe_charset) - ); - } - } - #[tokio::test] async fn test_insert_cached() { let (file_fetcher, temp_dir) = setup(CacheSetting::Use, None); let local = temp_dir.path().join("a.ts"); let specifier = ModuleSpecifier::from_file_path(&local).unwrap(); let file = File { - maybe_types: None, - media_type: MediaType::TypeScript, - source: "some source code".into(), + source: Arc::from("some source code".as_bytes()), specifier: specifier.clone(), - maybe_headers: None, + maybe_headers: Some(HashMap::from([( + "content-type".to_string(), + "application/javascript".to_string(), + )])), }; file_fetcher.insert_cached(file.clone()); @@ -1069,8 +851,8 @@ mod tests { let maybe_file = file_fetcher.get_source(&specifier); assert!(maybe_file.is_some()); - let file = maybe_file.unwrap(); - assert_eq!(&*file.source, "export const redirect = 1;\n"); + let file = maybe_file.unwrap().into_text_decoded().unwrap(); + assert_eq!(file.source.as_ref(), "export const redirect = 1;\n"); assert_eq!( file.specifier, resolve_url("http://localhost:4545/subdir/redirects/redirect1.js") @@ -1087,13 +869,12 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export const a = \"a\";\n\nexport enum A {\n A,\n B,\n C,\n}\n" ); assert_eq!(file.media_type, MediaType::TypeScript); - assert_eq!(file.maybe_types, None); assert_eq!(file.specifier, specifier); } @@ -1119,13 +900,12 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export const a = \"a\";\n\nexport enum A {\n A,\n B,\n C,\n}\n" ); assert_eq!(file.media_type, MediaType::TypeScript); - assert_eq!(file.maybe_types, None); assert_eq!(file.specifier, specifier); } @@ -1142,7 +922,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export { printHello } from \"./print_hello.ts\";\n" @@ -1169,7 +949,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export { printHello } from \"./print_hello.ts\";\n" @@ -1198,7 +978,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export { printHello } from \"./print_hello.ts\";\n" @@ -1223,7 +1003,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!( &*file.source, "export { printHello } from \"./print_hello.ts\";\n" @@ -1634,7 +1414,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!(&*file.source, r#"console.log("hello deno");"#); fs::write(fixture_path, r#"console.log("goodbye deno");"#).unwrap(); @@ -1642,7 +1422,7 @@ mod tests { .fetch(&specifier, PermissionsContainer::allow_all()) .await; assert!(result.is_ok()); - let file = result.unwrap(); + let file = result.unwrap().into_text_decoded().unwrap(); assert_eq!(&*file.source, r#"console.log("goodbye deno");"#); } @@ -1702,78 +1482,36 @@ mod tests { #[tokio::test] async fn test_fetch_local_utf_16be() { - let expected = String::from_utf8( - b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(), - ) - .unwrap(); + let expected = + String::from_utf8(b"console.log(\"Hello World\");\x0A".to_vec()).unwrap(); test_fetch_local_encoded("utf-16be", expected).await; } #[tokio::test] async fn test_fetch_local_utf_16le() { - let expected = String::from_utf8( - b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(), - ) - .unwrap(); + let expected = + String::from_utf8(b"console.log(\"Hello World\");\x0A".to_vec()).unwrap(); test_fetch_local_encoded("utf-16le", expected).await; } #[tokio::test] async fn test_fetch_local_utf8_with_bom() { - let expected = String::from_utf8( - b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(), - ) - .unwrap(); + let expected = + String::from_utf8(b"console.log(\"Hello World\");\x0A".to_vec()).unwrap(); test_fetch_local_encoded("utf-8", expected).await; } #[tokio::test] - async fn test_fetch_remote_javascript_with_types() { - let specifier = - ModuleSpecifier::parse("http://127.0.0.1:4545/xTypeScriptTypes.js") - .unwrap(); - let (file, _) = test_fetch_remote(&specifier).await; - assert_eq!( - file.maybe_types, - Some("./xTypeScriptTypes.d.ts".to_string()) - ); - } - - #[tokio::test] - async fn test_fetch_remote_jsx_with_types() { - let specifier = - ModuleSpecifier::parse("http://127.0.0.1:4545/xTypeScriptTypes.jsx") - .unwrap(); - let (file, _) = test_fetch_remote(&specifier).await; - assert_eq!(file.media_type, MediaType::Jsx,); - assert_eq!( - file.maybe_types, - Some("./xTypeScriptTypes.d.ts".to_string()) - ); - } - - #[tokio::test] - async fn test_fetch_remote_typescript_with_types() { - let specifier = - ModuleSpecifier::parse("http://127.0.0.1:4545/xTypeScriptTypes.ts") - .unwrap(); - let (file, _) = test_fetch_remote(&specifier).await; - assert_eq!(file.maybe_types, None); - } - - #[tokio::test] async fn test_fetch_remote_utf16_le() { let expected = - std::str::from_utf8(b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A") - .unwrap(); + std::str::from_utf8(b"console.log(\"Hello World\");\x0A").unwrap(); test_fetch_remote_encoded("utf-16le.ts", "utf-16le", expected).await; } #[tokio::test] async fn test_fetch_remote_utf16_be() { let expected = - std::str::from_utf8(b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A") - .unwrap(); + std::str::from_utf8(b"console.log(\"Hello World\");\x0A").unwrap(); test_fetch_remote_encoded("utf-16be.ts", "utf-16be", expected).await; } |