summaryrefslogtreecommitdiff
path: root/cli
diff options
context:
space:
mode:
Diffstat (limited to 'cli')
-rw-r--r--cli/Cargo.toml1
-rw-r--r--cli/file_fetcher.rs346
-rw-r--r--cli/fmt.rs7
-rw-r--r--cli/global_state.rs2
-rw-r--r--cli/main.rs15
-rw-r--r--cli/module_graph.rs10
-rw-r--r--cli/tests/encoding/utf-16be.tsbin0 -> 58 bytes
-rw-r--r--cli/tests/encoding/utf-16le.tsbin0 -> 58 bytes
-rw-r--r--cli/tests/encoding/utf-8.ts1
-rw-r--r--cli/tests/encoding/windows-12551
-rw-r--r--cli/text_encoding.rs94
-rw-r--r--cli/tsc.rs26
12 files changed, 404 insertions, 99 deletions
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
index 0a9125355..8a8958b63 100644
--- a/cli/Cargo.toml
+++ b/cli/Cargo.toml
@@ -32,6 +32,7 @@ byteorder = "1.3.4"
clap = "2.33.1"
dissimilar = "1.0.2"
dlopen = "0.1.8"
+encoding_rs = "0.8.23"
dprint-plugin-typescript = "0.25.0"
futures = "0.3.5"
http = "0.2.1"
diff --git a/cli/file_fetcher.rs b/cli/file_fetcher.rs
index 7d72d5cd4..63743f5ab 100644
--- a/cli/file_fetcher.rs
+++ b/cli/file_fetcher.rs
@@ -7,10 +7,12 @@ use crate::http_util::FetchOnceResult;
use crate::msg;
use crate::op_error::OpError;
use crate::permissions::Permissions;
+use crate::text_encoding;
use deno_core::ErrBox;
use deno_core::ModuleSpecifier;
use futures::future::FutureExt;
use log::info;
+use std::borrow::Cow;
use std::collections::HashMap;
use std::fs;
use std::future::Future;
@@ -24,6 +26,47 @@ use std::sync::Arc;
use std::sync::Mutex;
use url::Url;
+/// Structure representing a text document.
+#[derive(Debug, Clone)]
+pub struct TextDocument {
+ bytes: Vec<u8>,
+ charset: Cow<'static, str>,
+}
+
+impl TextDocument {
+ pub fn new(
+ bytes: Vec<u8>,
+ charset: Option<impl Into<Cow<'static, str>>>,
+ ) -> TextDocument {
+ let charset = charset
+ .map(|cs| cs.into())
+ .unwrap_or_else(|| text_encoding::detect_charset(&bytes).into());
+ TextDocument { bytes, charset }
+ }
+
+ pub fn as_bytes(&self) -> &Vec<u8> {
+ &self.bytes
+ }
+
+ pub fn into_bytes(self) -> Vec<u8> {
+ self.bytes
+ }
+
+ pub fn to_str(&self) -> Result<Cow<str>, std::io::Error> {
+ text_encoding::convert_to_utf8(&self.bytes, &self.charset)
+ }
+
+ pub fn to_string(&self) -> Result<String, std::io::Error> {
+ self.to_str().map(String::from)
+ }
+}
+
+impl From<Vec<u8>> for TextDocument {
+ fn from(bytes: Vec<u8>) -> Self {
+ TextDocument::new(bytes, Option::<&str>::None)
+ }
+}
+
/// Structure representing local or remote file.
///
/// In case of remote file `url` might be different than originally requested URL, if so
@@ -34,7 +77,7 @@ pub struct SourceFile {
pub filename: PathBuf,
pub types_header: Option<String>,
pub media_type: msg::MediaType,
- pub source_code: Vec<u8>,
+ pub source_code: TextDocument,
}
/// Simple struct implementing in-process caching to prevent multiple
@@ -180,8 +223,9 @@ impl SourceFileFetcher {
match result {
Ok(mut file) => {
// TODO: move somewhere?
- if file.source_code.starts_with(b"#!") {
- file.source_code = filter_shebang(file.source_code);
+ if file.source_code.bytes.starts_with(b"#!") {
+ file.source_code =
+ filter_shebang(&file.source_code.to_str().unwrap()[..]).into();
}
// Cache in-process for subsequent access.
@@ -313,12 +357,12 @@ impl SourceFileFetcher {
Err(e) => return Err(e.into()),
};
- let media_type = map_content_type(&filepath, None);
+ let (media_type, charset) = map_content_type(&filepath, None);
Ok(SourceFile {
url: module_url.clone(),
filename: filepath,
media_type,
- source_code,
+ source_code: TextDocument::new(source_code, charset),
types_header: None,
})
}
@@ -380,7 +424,7 @@ impl SourceFileFetcher {
let cache_filename = self.http_cache.get_cache_filename(module_url);
let fake_filepath = PathBuf::from(module_url.path());
- let media_type = map_content_type(
+ let (media_type, charset) = map_content_type(
&fake_filepath,
headers.get("content-type").map(|e| e.as_str()),
);
@@ -389,7 +433,7 @@ impl SourceFileFetcher {
url: module_url.clone(),
filename: cache_filename,
media_type,
- source_code,
+ source_code: TextDocument::new(source_code, charset),
types_header,
}))
}
@@ -490,7 +534,7 @@ impl SourceFileFetcher {
let cache_filepath = dir.http_cache.get_cache_filename(&module_url);
// Used to sniff out content type from file extension - probably to be removed
let fake_filepath = PathBuf::from(module_url.path());
- let media_type = map_content_type(
+ let (media_type, charset) = map_content_type(
&fake_filepath,
headers.get("content-type").map(String::as_str),
);
@@ -502,7 +546,7 @@ impl SourceFileFetcher {
url: module_url.clone(),
filename: cache_filepath,
media_type,
- source_code: source,
+ source_code: TextDocument::new(source, charset),
types_header,
};
@@ -532,16 +576,19 @@ pub fn map_file_extension(path: &Path) -> msg::MediaType {
}
}
-// convert a ContentType string into a enumerated MediaType
-fn map_content_type(path: &Path, content_type: Option<&str>) -> msg::MediaType {
+// convert a ContentType string into a enumerated MediaType + optional charset
+fn map_content_type(
+ path: &Path,
+ content_type: Option<&str>,
+) -> (msg::MediaType, Option<String>) {
match content_type {
Some(content_type) => {
- // sometimes there is additional data after the media type in
+ // Sometimes there is additional data after the media type in
// Content-Type so we have to do a bit of manipulation so we are only
- // dealing with the actual media type
- let ct_vector: Vec<&str> = content_type.split(';').collect();
- let ct: &str = ct_vector.first().unwrap();
- match ct.to_lowercase().as_ref() {
+ // dealing with the actual media type.
+ let mut ct_iter = content_type.split(';');
+ let ct = ct_iter.next().unwrap();
+ let media_type = match ct.to_lowercase().as_ref() {
"application/typescript"
| "text/typescript"
| "video/vnd.dlna.mpeg-tts"
@@ -565,9 +612,16 @@ fn map_content_type(path: &Path, content_type: Option<&str>) -> msg::MediaType {
debug!("unknown content type: {}", content_type);
msg::MediaType::Unknown
}
- }
+ };
+
+ let charset = ct_iter
+ .map(str::trim)
+ .find_map(|s| s.strip_prefix("charset="))
+ .map(String::from);
+
+ (media_type, charset)
}
- None => map_file_extension(path),
+ None => (map_file_extension(path), None),
}
}
@@ -586,8 +640,7 @@ fn map_js_like_extension(
}
}
-fn filter_shebang(bytes: Vec<u8>) -> Vec<u8> {
- let string = str::from_utf8(&bytes).unwrap();
+fn filter_shebang(string: &str) -> Vec<u8> {
if let Some(i) = string.find('\n') {
let (_, rest) = string.split_at(i);
rest.as_bytes().to_owned()
@@ -767,7 +820,7 @@ mod tests {
assert!(result.is_ok());
let r = result.unwrap();
assert_eq!(
- r.source_code,
+ r.source_code.bytes,
&b"export { printHello } from \"./print_hello.ts\";\n"[..]
);
assert_eq!(&(r.media_type), &msg::MediaType::TypeScript);
@@ -794,7 +847,7 @@ mod tests {
assert!(result2.is_ok());
let r2 = result2.unwrap();
assert_eq!(
- r2.source_code,
+ r2.source_code.bytes,
&b"export { printHello } from \"./print_hello.ts\";\n"[..]
);
// If get_source_file does not call remote, this should be JavaScript
@@ -823,7 +876,7 @@ mod tests {
assert!(result3.is_ok());
let r3 = result3.unwrap();
assert_eq!(
- r3.source_code,
+ r3.source_code.bytes,
&b"export { printHello } from \"./print_hello.ts\";\n"[..]
);
// If get_source_file does not call remote, this should be JavaScript
@@ -850,7 +903,7 @@ mod tests {
assert!(result4.is_ok());
let r4 = result4.unwrap();
let expected4 = &b"export { printHello } from \"./print_hello.ts\";\n"[..];
- assert_eq!(r4.source_code, expected4);
+ assert_eq!(r4.source_code.bytes, expected4);
// Resolved back to TypeScript
assert_eq!(&(r4.media_type), &msg::MediaType::TypeScript);
@@ -880,7 +933,7 @@ mod tests {
assert!(result.is_ok());
let r = result.unwrap();
let expected = b"export const loaded = true;\n";
- assert_eq!(r.source_code, expected);
+ assert_eq!(r.source_code.bytes, expected);
assert_eq!(&(r.media_type), &msg::MediaType::JavaScript);
let (_, headers) = fetcher.http_cache.get(&module_url).unwrap();
assert_eq!(headers.get("content-type").unwrap(), "text/javascript");
@@ -906,7 +959,7 @@ mod tests {
assert!(result2.is_ok());
let r2 = result2.unwrap();
let expected2 = b"export const loaded = true;\n";
- assert_eq!(r2.source_code, expected2);
+ assert_eq!(r2.source_code.bytes, expected2);
// If get_source_file does not call remote, this should be TypeScript
// as we modified before! (we do not overwrite .headers.json due to no http
// fetch)
@@ -932,7 +985,7 @@ mod tests {
assert!(result3.is_ok());
let r3 = result3.unwrap();
let expected3 = b"export const loaded = true;\n";
- assert_eq!(r3.source_code, expected3);
+ assert_eq!(r3.source_code.bytes, expected3);
// Now the old .headers.json file should be overwritten back to JavaScript!
// (due to http fetch)
assert_eq!(&(r3.media_type), &msg::MediaType::JavaScript);
@@ -1352,7 +1405,7 @@ mod tests {
.await;
assert!(result.is_ok());
let r = result.unwrap();
- assert_eq!(r.source_code, b"export const loaded = true;\n");
+ assert_eq!(r.source_code.bytes, b"export const loaded = true;\n");
assert_eq!(&(r.media_type), &msg::MediaType::TypeScript);
// Modify .metadata.json, make sure read from local
@@ -1368,7 +1421,7 @@ mod tests {
let result2 = fetcher.fetch_cached_remote_source(&module_url, 1);
assert!(result2.is_ok());
let r2 = result2.unwrap().unwrap();
- assert_eq!(r2.source_code, b"export const loaded = true;\n");
+ assert_eq!(r2.source_code.bytes, b"export const loaded = true;\n");
// Not MediaType::TypeScript due to .headers.json modification
assert_eq!(&(r2.media_type), &msg::MediaType::JavaScript);
@@ -1392,7 +1445,7 @@ mod tests {
.await;
assert!(result.is_ok());
let r = result.unwrap();
- assert_eq!(r.source_code, b"export const loaded = true;\n");
+ assert_eq!(r.source_code.bytes, b"export const loaded = true;\n");
assert_eq!(&(r.media_type), &msg::MediaType::TypeScript);
let (_, headers) = fetcher.http_cache.get(module_url).unwrap();
assert_eq!(headers.get("content-type").unwrap(), "text/typescript");
@@ -1417,7 +1470,7 @@ mod tests {
.await;
assert!(result.is_ok());
let r2 = result.unwrap();
- assert_eq!(r2.source_code, b"export const loaded = true;\n");
+ assert_eq!(r2.source_code.bytes, b"export const loaded = true;\n");
assert_eq!(&(r2.media_type), &msg::MediaType::JavaScript);
let (_, headers) = fetcher.http_cache.get(module_url).unwrap();
assert_eq!(headers.get("content-type").unwrap(), "text/javascript");
@@ -1442,7 +1495,7 @@ mod tests {
.await;
assert!(result.is_ok());
let r3 = result.unwrap();
- assert_eq!(r3.source_code, b"export const loaded = true;\n");
+ assert_eq!(r3.source_code.bytes, b"export const loaded = true;\n");
assert_eq!(&(r3.media_type), &msg::MediaType::TypeScript);
let (_, headers) = fetcher.http_cache.get(module_url).unwrap();
assert_eq!(headers.get("content-type").unwrap(), "text/typescript");
@@ -1523,6 +1576,63 @@ mod tests {
}
}
+ async fn test_fetch_source_file_from_disk_nonstandard_encoding(
+ charset: &str,
+ expected_content: String,
+ ) {
+ let (_temp_dir, fetcher) = test_setup();
+
+ let p = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+ .join(format!("tests/encoding/{}.ts", charset));
+ let specifier =
+ ModuleSpecifier::resolve_url_or_path(p.to_str().unwrap()).unwrap();
+ let r = fetcher
+ .fetch_source_file(&specifier, None, Permissions::allow_all())
+ .await;
+ assert!(r.is_ok());
+ let fetched_file = r.unwrap();
+ let source_code = fetched_file.source_code.to_str();
+ assert!(source_code.is_ok());
+ let actual = source_code.unwrap();
+ assert_eq!(expected_content, actual);
+ }
+
+ #[tokio::test]
+ async fn test_fetch_source_file_from_disk_utf_16_be() {
+ test_fetch_source_file_from_disk_nonstandard_encoding(
+ "utf-16be",
+ String::from_utf8(
+ b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(),
+ )
+ .unwrap(),
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn test_fetch_source_file_from_disk_utf_16_le() {
+ test_fetch_source_file_from_disk_nonstandard_encoding(
+ "utf-16le",
+ String::from_utf8(
+ b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(),
+ )
+ .unwrap(),
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn test_fetch_source_file_from_disk_utf_8_with_bom() {
+ test_fetch_source_file_from_disk_nonstandard_encoding(
+ "utf-8",
+ String::from_utf8(
+ b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A".to_vec(),
+ )
+ .unwrap(),
+ )
+ .await;
+ }
+
#[test]
fn test_map_file_extension() {
assert_eq!(
@@ -1571,43 +1681,43 @@ mod tests {
fn test_map_content_type_extension_only() {
// Extension only
assert_eq!(
- map_content_type(Path::new("foo/bar.ts"), None),
+ map_content_type(Path::new("foo/bar.ts"), None).0,
msg::MediaType::TypeScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar.tsx"), None),
+ map_content_type(Path::new("foo/bar.tsx"), None).0,
msg::MediaType::TSX
);
assert_eq!(
- map_content_type(Path::new("foo/bar.d.ts"), None),
+ map_content_type(Path::new("foo/bar.d.ts"), None).0,
msg::MediaType::TypeScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar.js"), None),
+ map_content_type(Path::new("foo/bar.js"), None).0,
msg::MediaType::JavaScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar.txt"), None),
+ map_content_type(Path::new("foo/bar.txt"), None).0,
msg::MediaType::Unknown
);
assert_eq!(
- map_content_type(Path::new("foo/bar.jsx"), None),
+ map_content_type(Path::new("foo/bar.jsx"), None).0,
msg::MediaType::JSX
);
assert_eq!(
- map_content_type(Path::new("foo/bar.json"), None),
+ map_content_type(Path::new("foo/bar.json"), None).0,
msg::MediaType::Json
);
assert_eq!(
- map_content_type(Path::new("foo/bar.wasm"), None),
+ map_content_type(Path::new("foo/bar.wasm"), None).0,
msg::MediaType::Wasm
);
assert_eq!(
- map_content_type(Path::new("foo/bar.cjs"), None),
+ map_content_type(Path::new("foo/bar.cjs"), None).0,
msg::MediaType::JavaScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), None),
+ map_content_type(Path::new("foo/bar"), None).0,
msg::MediaType::Unknown
);
}
@@ -1616,140 +1726,154 @@ mod tests {
fn test_map_content_type_media_type_with_no_extension() {
// Media Type
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("application/typescript")),
+ map_content_type(Path::new("foo/bar"), Some("application/typescript")).0,
msg::MediaType::TypeScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("text/typescript")),
+ map_content_type(Path::new("foo/bar"), Some("text/typescript")).0,
msg::MediaType::TypeScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("video/vnd.dlna.mpeg-tts")),
+ map_content_type(Path::new("foo/bar"), Some("video/vnd.dlna.mpeg-tts")).0,
msg::MediaType::TypeScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("video/mp2t")),
+ map_content_type(Path::new("foo/bar"), Some("video/mp2t")).0,
msg::MediaType::TypeScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("application/x-typescript")),
+ map_content_type(Path::new("foo/bar"), Some("application/x-typescript"))
+ .0,
msg::MediaType::TypeScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("application/javascript")),
+ map_content_type(Path::new("foo/bar"), Some("application/javascript")).0,
msg::MediaType::JavaScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("text/javascript")),
+ map_content_type(Path::new("foo/bar"), Some("text/javascript")).0,
msg::MediaType::JavaScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("application/ecmascript")),
+ map_content_type(Path::new("foo/bar"), Some("application/ecmascript")).0,
msg::MediaType::JavaScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("text/ecmascript")),
+ map_content_type(Path::new("foo/bar"), Some("text/ecmascript")).0,
msg::MediaType::JavaScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("application/x-javascript")),
+ map_content_type(Path::new("foo/bar"), Some("application/x-javascript"))
+ .0,
msg::MediaType::JavaScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("application/json")),
+ map_content_type(Path::new("foo/bar"), Some("application/json")).0,
msg::MediaType::Json
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("application/node")),
+ map_content_type(Path::new("foo/bar"), Some("application/node")).0,
msg::MediaType::JavaScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar"), Some("text/json")),
+ map_content_type(Path::new("foo/bar"), Some("text/json")).0,
msg::MediaType::Json
);
+ assert_eq!(
+ map_content_type(Path::new("foo/bar"), Some("text/json; charset=utf-8 ")),
+ (msg::MediaType::Json, Some("utf-8".to_owned()))
+ );
}
#[test]
fn test_map_file_extension_media_type_with_extension() {
assert_eq!(
- map_content_type(Path::new("foo/bar.ts"), Some("text/plain")),
+ map_content_type(Path::new("foo/bar.ts"), Some("text/plain")).0,
msg::MediaType::TypeScript
);
assert_eq!(
- map_content_type(Path::new("foo/bar.ts"), Some("foo/bar")),
+ map_content_type(Path::new("foo/bar.ts"), Some("foo/bar")).0,
msg::MediaType::Unknown
);
assert_eq!(
map_content_type(
Path::new("foo/bar.tsx"),
Some("application/typescript"),
- ),
+ )
+ .0,
msg::MediaType::TSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.tsx"),
Some("application/javascript"),
- ),
+ )
+ .0,
msg::MediaType::TSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.tsx"),
Some("application/x-typescript"),
- ),
+ )
+ .0,
msg::MediaType::TSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.tsx"),
Some("video/vnd.dlna.mpeg-tts"),
- ),
+ )
+ .0,
msg::MediaType::TSX
);
assert_eq!(
- map_content_type(Path::new("foo/bar.tsx"), Some("video/mp2t")),
+ map_content_type(Path::new("foo/bar.tsx"), Some("video/mp2t")).0,
msg::MediaType::TSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.jsx"),
Some("application/javascript"),
- ),
+ )
+ .0,
msg::MediaType::JSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.jsx"),
Some("application/x-typescript"),
- ),
+ )
+ .0,
msg::MediaType::JSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.jsx"),
Some("application/ecmascript"),
- ),
+ )
+ .0,
msg::MediaType::JSX
);
assert_eq!(
- map_content_type(Path::new("foo/bar.jsx"), Some("text/ecmascript")),
+ map_content_type(Path::new("foo/bar.jsx"), Some("text/ecmascript")).0,
msg::MediaType::JSX
);
assert_eq!(
map_content_type(
Path::new("foo/bar.jsx"),
Some("application/x-javascript"),
- ),
+ )
+ .0,
msg::MediaType::JSX
);
}
#[test]
fn test_filter_shebang() {
- assert_eq!(filter_shebang(b"#!"[..].to_owned()), b"");
- assert_eq!(filter_shebang(b"#!\n\n"[..].to_owned()), b"\n\n");
- let code = b"#!/usr/bin/env deno\nconsole.log('hello');\n"[..].to_owned();
+ assert_eq!(filter_shebang("#!"), b"");
+ assert_eq!(filter_shebang("#!\n\n"), b"\n\n");
+ let code = "#!/usr/bin/env deno\nconsole.log('hello');\n";
assert_eq!(filter_shebang(code), b"\nconsole.log('hello');\n");
}
@@ -1771,7 +1895,7 @@ mod tests {
.await;
assert!(source.is_ok());
let source = source.unwrap();
- assert_eq!(source.source_code, b"console.log('etag')");
+ assert_eq!(source.source_code.bytes, b"console.log('etag')");
assert_eq!(&(source.media_type), &msg::MediaType::TypeScript);
let (_, headers) = fetcher.http_cache.get(&module_url).unwrap();
@@ -1798,7 +1922,7 @@ mod tests {
)
.await
.unwrap();
- assert_eq!(cached_source.source_code, b"changed content");
+ assert_eq!(cached_source.source_code.bytes, b"changed content");
let modified2 = metadata_path.metadata().unwrap().modified().unwrap();
@@ -1825,7 +1949,7 @@ mod tests {
.await;
assert!(source.is_ok());
let source = source.unwrap();
- assert_eq!(source.source_code, b"export const foo = 'foo';");
+ assert_eq!(source.source_code.bytes, b"export const foo = 'foo';");
assert_eq!(&(source.media_type), &msg::MediaType::JavaScript);
assert_eq!(
source.types_header,
@@ -1833,4 +1957,80 @@ mod tests {
);
drop(http_server_guard);
}
+
+ #[tokio::test]
+ async fn test_fetch_source_file_from_net_utf16_le() {
+ let content =
+ std::str::from_utf8(b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A")
+ .unwrap();
+ test_fetch_non_utf8_source_file_from_net(
+ "utf-16le",
+ "utf-16le.ts",
+ content,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn test_fetch_source_file_from_net_utf16_be() {
+ let content =
+ std::str::from_utf8(b"\xEF\xBB\xBFconsole.log(\"Hello World\");\x0A")
+ .unwrap();
+ test_fetch_non_utf8_source_file_from_net(
+ "utf-16be",
+ "utf-16be.ts",
+ content,
+ )
+ .await;
+ }
+
+ #[tokio::test]
+ async fn test_fetch_source_file_from_net_windows_1255() {
+ let content = "console.log(\"\u{5E9}\u{5DC}\u{5D5}\u{5DD} \
+ \u{5E2}\u{5D5}\u{5DC}\u{5DD}\");\u{A}";
+ test_fetch_non_utf8_source_file_from_net(
+ "windows-1255",
+ "windows-1255",
+ content,
+ )
+ .await;
+ }
+
+ async fn test_fetch_non_utf8_source_file_from_net(
+ charset: &str,
+ file_name: &str,
+ expected_content: &str,
+ ) {
+ let http_server_guard = test_util::http_server();
+ let (_temp_dir, fetcher) = test_setup();
+ let module_url = Url::parse(&format!(
+ "http://127.0.0.1:4545/cli/tests/encoding/{}",
+ file_name
+ ))
+ .unwrap();
+
+ let source = fetcher
+ .fetch_remote_source(
+ &module_url,
+ false,
+ false,
+ 1,
+ &Permissions::allow_all(),
+ )
+ .await;
+ assert!(source.is_ok());
+ let source = source.unwrap();
+ assert_eq!(&source.source_code.charset.to_lowercase()[..], charset);
+ let text = &source.source_code.to_str().unwrap();
+ assert_eq!(text, expected_content);
+ assert_eq!(&(source.media_type), &msg::MediaType::TypeScript);
+
+ let (_, headers) = fetcher.http_cache.get(&module_url).unwrap();
+ assert_eq!(
+ headers.get("content-type").unwrap(),
+ &format!("application/typescript;charset={}", charset)
+ );
+
+ drop(http_server_guard);
+ }
}
diff --git a/cli/fmt.rs b/cli/fmt.rs
index 70bc0e8bc..319f7fece 100644
--- a/cli/fmt.rs
+++ b/cli/fmt.rs
@@ -11,6 +11,7 @@ use crate::colors;
use crate::diff::diff;
use crate::fs::files_in_subtree;
use crate::op_error::OpError;
+use crate::text_encoding;
use deno_core::ErrBox;
use dprint_plugin_typescript as dprint;
use std::fs;
@@ -247,13 +248,15 @@ struct FileContents {
}
fn read_file_contents(file_path: &PathBuf) -> Result<FileContents, ErrBox> {
- let file_text = fs::read_to_string(&file_path)?;
+ let file_bytes = fs::read(&file_path)?;
+ let charset = text_encoding::detect_charset(&file_bytes);
+ let file_text = text_encoding::convert_to_utf8(&file_bytes, charset)?;
let had_bom = file_text.starts_with(BOM_CHAR);
let text = if had_bom {
// remove the BOM
String::from(&file_text[BOM_CHAR.len_utf8()..])
} else {
- file_text
+ String::from(file_text)
};
Ok(FileContents { text, had_bom })
diff --git a/cli/global_state.rs b/cli/global_state.rs
index a26fc453e..a723bdd2f 100644
--- a/cli/global_state.rs
+++ b/cli/global_state.rs
@@ -250,7 +250,7 @@ impl GlobalState {
}
} else {
CompiledModule {
- code: String::from_utf8(out.source_code.clone())?,
+ code: out.source_code.to_string()?,
name: out.url.to_string(),
}
};
diff --git a/cli/main.rs b/cli/main.rs
index cff401fba..191355a0c 100644
--- a/cli/main.rs
+++ b/cli/main.rs
@@ -11,6 +11,7 @@ extern crate futures;
extern crate serde_json;
extern crate clap;
extern crate deno_core;
+extern crate encoding_rs;
extern crate indexmap;
#[cfg(unix)]
extern crate nix;
@@ -60,6 +61,7 @@ mod startup_data;
pub mod state;
mod swc_util;
mod test_runner;
+mod text_encoding;
mod tokio_util;
mod tsc;
mod upgrade;
@@ -70,6 +72,7 @@ pub mod worker;
use crate::doc::parser::DocFileLoader;
use crate::file_fetcher::SourceFile;
use crate::file_fetcher::SourceFileFetcher;
+use crate::file_fetcher::TextDocument;
use crate::fs as deno_fs;
use crate::global_state::GlobalState;
use crate::msg::MediaType;
@@ -412,7 +415,7 @@ async fn eval_command(
} else {
MediaType::JavaScript
},
- source_code,
+ source_code: TextDocument::new(source_code, Some("utf-8")),
};
// Save our fake file into file fetcher cache
// to allow module access by TS compiler (e.g. op_fetch_source_files)
@@ -525,8 +528,7 @@ async fn doc_command(
let source_file = fetcher
.fetch_source_file(&specifier, None, Permissions::allow_all())
.await?;
- String::from_utf8(source_file.source_code)
- .map_err(|_| OpError::other("failed to parse".to_string()))
+ source_file.source_code.to_string().map_err(OpError::from)
}
.boxed_local()
}
@@ -601,7 +603,7 @@ async fn run_command(flags: Flags, script: String) -> Result<(), ErrBox> {
url: main_module_url,
types_header: None,
media_type: MediaType::TypeScript,
- source_code: source,
+ source_code: source.into(),
};
// Save our fake file into file fetcher cache
// to allow module access by TS compiler (e.g. op_fetch_source_files)
@@ -657,7 +659,10 @@ async fn test_command(
url: test_file_url,
types_header: None,
media_type: MediaType::TypeScript,
- source_code: test_file.clone().into_bytes(),
+ source_code: TextDocument::new(
+ test_file.clone().into_bytes(),
+ Some("utf-8"),
+ ),
};
// Save our fake file into file fetcher cache
// to allow module access by TS compiler (e.g. op_fetch_source_files)
diff --git a/cli/module_graph.rs b/cli/module_graph.rs
index 3fb1379f3..8b7a52906 100644
--- a/cli/module_graph.rs
+++ b/cli/module_graph.rs
@@ -458,7 +458,7 @@ impl ModuleGraphLoader {
redirect: Some(source_file.url.to_string()),
filename: source_file.filename.to_str().unwrap().to_string(),
version_hash: checksum::gen(&[
- &source_file.source_code,
+ &source_file.source_code.as_bytes(),
version::DENO.as_bytes(),
]),
media_type: source_file.media_type,
@@ -473,9 +473,11 @@ impl ModuleGraphLoader {
}
let module_specifier = ModuleSpecifier::from(source_file.url.clone());
- let version_hash =
- checksum::gen(&[&source_file.source_code, version::DENO.as_bytes()]);
- let source_code = String::from_utf8(source_file.source_code)?;
+ let version_hash = checksum::gen(&[
+ &source_file.source_code.as_bytes(),
+ version::DENO.as_bytes(),
+ ]);
+ let source_code = source_file.source_code.to_string()?;
if SUPPORTED_MEDIA_TYPES.contains(&source_file.media_type) {
if let Some(types_specifier) = source_file.types_header {
diff --git a/cli/tests/encoding/utf-16be.ts b/cli/tests/encoding/utf-16be.ts
new file mode 100644
index 000000000..3d0144d7c
--- /dev/null
+++ b/cli/tests/encoding/utf-16be.ts
Binary files differ
diff --git a/cli/tests/encoding/utf-16le.ts b/cli/tests/encoding/utf-16le.ts
new file mode 100644
index 000000000..6f0e415f2
--- /dev/null
+++ b/cli/tests/encoding/utf-16le.ts
Binary files differ
diff --git a/cli/tests/encoding/utf-8.ts b/cli/tests/encoding/utf-8.ts
new file mode 100644
index 000000000..bf889aeb7
--- /dev/null
+++ b/cli/tests/encoding/utf-8.ts
@@ -0,0 +1 @@
+console.log("Hello World");
diff --git a/cli/tests/encoding/windows-1255 b/cli/tests/encoding/windows-1255
new file mode 100644
index 000000000..ec5cad7fd
--- /dev/null
+++ b/cli/tests/encoding/windows-1255
@@ -0,0 +1 @@
+console.log("ùìåí òåìí");
diff --git a/cli/text_encoding.rs b/cli/text_encoding.rs
new file mode 100644
index 000000000..9d8e5bcc9
--- /dev/null
+++ b/cli/text_encoding.rs
@@ -0,0 +1,94 @@
+// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
+use encoding_rs::*;
+use std::{
+ borrow::Cow,
+ io::{Error, ErrorKind},
+};
+
+/// Attempts to detect the character encoding of the provided bytes.
+///
+/// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian.
+pub fn detect_charset(bytes: &'_ [u8]) -> &'static str {
+ const UTF16_LE_BOM: &[u8] = b"\xFF\xFE";
+ const UTF16_BE_BOM: &[u8] = b"\xFE\xFF";
+
+ if bytes.starts_with(UTF16_LE_BOM) {
+ "utf-16le"
+ } else if bytes.starts_with(UTF16_BE_BOM) {
+ "utf-16be"
+ } else {
+ // Assume everything else is utf-8
+ "utf-8"
+ }
+}
+
+/// Attempts to convert the provided bytes to a UTF-8 string.
+///
+/// Supports all encodings supported by the encoding_rs crate, which includes
+/// all encodings specified in the WHATWG Encoding Standard, and only those
+/// encodings (see: https://encoding.spec.whatwg.org/).
+pub fn convert_to_utf8<'a>(
+ bytes: &'a [u8],
+ charset: &'_ str,
+) -> Result<Cow<'a, str>, Error> {
+ match Encoding::for_label(charset.as_bytes()) {
+ Some(encoding) => encoding
+ .decode_without_bom_handling_and_without_replacement(bytes)
+ .ok_or_else(|| ErrorKind::InvalidData.into()),
+ None => Err(Error::new(
+ ErrorKind::InvalidInput,
+ format!("Unsupported charset: {}", charset),
+ )),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn test_detection(test_data: &[u8], expected_charset: &str) {
+ let detected_charset = detect_charset(test_data);
+ assert_eq!(
+ expected_charset.to_lowercase(),
+ detected_charset.to_lowercase()
+ );
+ }
+
+ #[test]
+ fn test_detection_utf8_no_bom() {
+ let test_data = "Hello UTF-8 it is \u{23F0} for Deno!"
+ .to_owned()
+ .into_bytes();
+ test_detection(&test_data, "utf-8");
+ }
+
+ #[test]
+ fn test_detection_utf16_little_endian() {
+ let test_data = b"\xFF\xFEHello UTF-16LE".to_owned().to_vec();
+ test_detection(&test_data, "utf-16le");
+ }
+
+ #[test]
+ fn test_detection_utf16_big_endian() {
+ let test_data = b"\xFE\xFFHello UTF-16BE".to_owned().to_vec();
+ test_detection(&test_data, "utf-16be");
+ }
+
+ #[test]
+ fn test_decoding_unsupported_charset() {
+ let test_data = Vec::new();
+ let result = convert_to_utf8(&test_data, "utf-32le");
+ assert!(result.is_err());
+ let err = result.expect_err("Err expected");
+ assert!(err.kind() == ErrorKind::InvalidInput);
+ }
+
+ #[test]
+ fn test_decoding_invalid_utf8() {
+ let test_data = b"\xFE\xFE\xFF\xFF".to_vec();
+ let result = convert_to_utf8(&test_data, "utf-8");
+ assert!(result.is_err());
+ let err = result.expect_err("Err expected");
+ assert!(err.kind() == ErrorKind::InvalidData);
+ }
+}
diff --git a/cli/tsc.rs b/cli/tsc.rs
index fb25df8d5..41128948b 100644
--- a/cli/tsc.rs
+++ b/cli/tsc.rs
@@ -471,7 +471,7 @@ impl TsCompiler {
if let Some(metadata) = self.get_metadata(&url) {
// Compare version hashes
let version_hash_to_validate = source_code_version_hash(
- &source_file.source_code,
+ &source_file.source_code.as_bytes(),
version::DENO,
&self.config.hash,
);
@@ -512,7 +512,7 @@ impl TsCompiler {
.fetch_cached_source_file(&specifier, Permissions::allow_all())
{
let existing_hash = crate::checksum::gen(&[
- &source_file.source_code,
+ &source_file.source_code.as_bytes(),
version::DENO.as_bytes(),
]);
let expected_hash =
@@ -851,9 +851,7 @@ impl TsCompiler {
let compiled_source_file = self.get_compiled_source_file(module_url)?;
let compiled_module = CompiledModule {
- code: str::from_utf8(&compiled_source_file.source_code)
- .unwrap()
- .to_string(),
+ code: compiled_source_file.source_code.to_string()?,
name: module_url.to_string(),
};
@@ -861,8 +859,8 @@ impl TsCompiler {
}
/// Return compiled JS file for given TS module.
- // TODO: ideally we shouldn't construct SourceFile by hand, but it should be delegated to
- // SourceFileFetcher
+ // TODO: ideally we shouldn't construct SourceFile by hand, but it should be
+ // delegated to SourceFileFetcher.
pub fn get_compiled_source_file(
&self,
module_url: &Url,
@@ -878,7 +876,7 @@ impl TsCompiler {
url: module_url.clone(),
filename: compiled_code_filename,
media_type: msg::MediaType::JavaScript,
- source_code: compiled_code,
+ source_code: compiled_code.into(),
types_header: None,
};
@@ -902,7 +900,7 @@ impl TsCompiler {
self.mark_compiled(module_specifier.as_url());
let version_hash = source_code_version_hash(
- &source_file.source_code,
+ &source_file.source_code.as_bytes(),
version::DENO,
&self.config.hash,
);
@@ -935,7 +933,7 @@ impl TsCompiler {
url: module_specifier.as_url().to_owned(),
filename: source_map_filename,
media_type: msg::MediaType::JavaScript,
- source_code,
+ source_code: source_code.into(),
types_header: None,
};
@@ -981,7 +979,7 @@ impl SourceMapGetter for TsCompiler {
self
.try_resolve_and_get_source_file(script_name)
.and_then(|out| {
- str::from_utf8(&out.source_code).ok().map(|v| {
+ out.source_code.to_str().ok().map(|v| {
// Do NOT use .lines(): it skips the terminating empty line.
// (due to internally using .split_terminator() instead of .split())
let lines: Vec<&str> = v.split('\n').collect();
@@ -1020,7 +1018,7 @@ impl TsCompiler {
) -> Option<Vec<u8>> {
if let Some(module_specifier) = self.try_to_resolve(script_name) {
return match self.get_source_map_file(&module_specifier) {
- Ok(out) => Some(out.source_code),
+ Ok(out) => Some(out.source_code.into_bytes()),
Err(_) => {
// Check if map is inlined
if let Ok(compiled_source) =
@@ -1566,7 +1564,7 @@ mod tests {
url: specifier.as_url().clone(),
filename: PathBuf::from(p.to_str().unwrap().to_string()),
media_type: msg::MediaType::TypeScript,
- source_code: include_bytes!("./tests/002_hello.ts").to_vec(),
+ source_code: include_bytes!("./tests/002_hello.ts").to_vec().into(),
types_header: None,
};
let dir =
@@ -1642,7 +1640,7 @@ mod tests {
url: specifier.as_url().clone(),
filename: PathBuf::from(p.to_str().unwrap().to_string()),
media_type: msg::MediaType::TypeScript,
- source_code: include_bytes!("./tests/002_hello.ts").to_vec(),
+ source_code: include_bytes!("./tests/002_hello.ts").to_vec().into(),
types_header: None,
};
let dir =