summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.dprint.json1
-rw-r--r--cli/ast/mod.rs28
-rw-r--r--cli/tests/integration/run_tests.rs5
-rw-r--r--cli/tests/testdata/byte_order_mark.out1
-rw-r--r--cli/tests/testdata/byte_order_mark.ts4
-rw-r--r--cli/text_encoding.rs11
-rw-r--r--cli/tools/fmt.rs11
7 files changed, 44 insertions, 17 deletions
diff --git a/.dprint.json b/.dprint.json
index 91c827fc1..a0f502b94 100644
--- a/.dprint.json
+++ b/.dprint.json
@@ -25,6 +25,7 @@
"cli/tests/testdata/inline_js_source_map*",
"cli/tests/testdata/badly_formatted.md",
"cli/tests/testdata/badly_formatted.json",
+ "cli/tests/testdata/byte_order_mark.ts",
"cli/tsc/*typescript.js",
"test_util/std",
"test_util/wpt",
diff --git a/cli/ast/mod.rs b/cli/ast/mod.rs
index 15414ba8e..232db1305 100644
--- a/cli/ast/mod.rs
+++ b/cli/ast/mod.rs
@@ -2,6 +2,7 @@
use crate::config_file;
use crate::media_type::MediaType;
+use crate::text_encoding::strip_bom;
use deno_core::error::AnyError;
use deno_core::resolve_url_or_path;
@@ -392,10 +393,15 @@ pub fn parse(
source: &str,
media_type: &MediaType,
) -> Result<ParsedModule, AnyError> {
+ let source = strip_bom(source);
let info = SourceFileInfo::new(specifier, source);
let input =
StringInput::new(source, BytePos(0), BytePos(source.len() as u32));
- let (comments, module) = parse_string_input(&info, input, media_type)?;
+ let (comments, module) =
+ parse_string_input(input, media_type).map_err(|err| Diagnostic {
+ location: info.get_location(err.span().lo),
+ message: err.into_kind().msg().to_string(),
+ })?;
Ok(ParsedModule {
info: Arc::new(info),
@@ -468,13 +474,17 @@ pub fn transpile_module(
globals: &Globals,
cm: Rc<SourceMap>,
) -> Result<(Rc<SourceFile>, Module), AnyError> {
- let info = SourceFileInfo::new(specifier, source);
+ let source = strip_bom(source);
let source_file = cm.new_source_file(
FileName::Custom(specifier.to_string()),
source.to_string(),
);
let input = StringInput::from(&*source_file);
- let (comments, module) = parse_string_input(&info, input, media_type)?;
+ let (comments, module) =
+ parse_string_input(input, media_type).map_err(|err| Diagnostic {
+ location: cm.lookup_char_pos(err.span().lo).into(),
+ message: err.into_kind().msg().to_string(),
+ })?;
let jsx_pass = react::react(
cm,
@@ -511,19 +521,17 @@ pub fn transpile_module(
}
fn parse_string_input(
- info: &SourceFileInfo,
input: StringInput,
media_type: &MediaType,
-) -> Result<(SingleThreadedComments, Module), AnyError> {
+) -> Result<
+ (SingleThreadedComments, Module),
+ swc_ecmascript::parser::error::Error,
+> {
let syntax = get_syntax(media_type);
let comments = SingleThreadedComments::default();
let lexer = Lexer::new(syntax, TARGET, input, Some(&comments));
let mut parser = swc_ecmascript::parser::Parser::new_from(lexer);
-
- let module = parser.parse_module().map_err(|err| Diagnostic {
- location: info.get_location(err.span().lo),
- message: err.into_kind().msg().to_string(),
- })?;
+ let module = parser.parse_module()?;
Ok((comments, module))
}
diff --git a/cli/tests/integration/run_tests.rs b/cli/tests/integration/run_tests.rs
index f5ac82e9c..c7e0325ec 100644
--- a/cli/tests/integration/run_tests.rs
+++ b/cli/tests/integration/run_tests.rs
@@ -1788,3 +1788,8 @@ itest!(tls_connecttls {
args: "run --quiet --reload --allow-net --allow-read --cert tls/RootCA.pem tls_connecttls.js",
output: "tls.out",
});
+
+itest!(byte_order_mark {
+ args: "run --no-check byte_order_mark.ts",
+ output: "byte_order_mark.out",
+});
diff --git a/cli/tests/testdata/byte_order_mark.out b/cli/tests/testdata/byte_order_mark.out
new file mode 100644
index 000000000..557db03de
--- /dev/null
+++ b/cli/tests/testdata/byte_order_mark.out
@@ -0,0 +1 @@
+Hello World
diff --git a/cli/tests/testdata/byte_order_mark.ts b/cli/tests/testdata/byte_order_mark.ts
new file mode 100644
index 000000000..40eb23c1d
--- /dev/null
+++ b/cli/tests/testdata/byte_order_mark.ts
@@ -0,0 +1,4 @@
+import "./001_hello.js";
+// Note this file starts with special byte order mark <U+FEFF>
+// it's important that this file is a .ts typescript file which is passed to
+// deno through `--no-check` mode.
diff --git a/cli/text_encoding.rs b/cli/text_encoding.rs
index 8d316909c..f61b877dc 100644
--- a/cli/text_encoding.rs
+++ b/cli/text_encoding.rs
@@ -6,6 +6,8 @@ use std::{
io::{Error, ErrorKind},
};
+pub const BOM_CHAR: char = '\u{FEFF}';
+
/// Attempts to detect the character encoding of the provided bytes.
///
/// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian.
@@ -43,6 +45,15 @@ pub fn convert_to_utf8<'a>(
}
}
+/// Strips the byte order mark from the provided text if it exists.
+pub fn strip_bom(text: &str) -> &str {
+ if text.starts_with(BOM_CHAR) {
+ &text[BOM_CHAR.len_utf8()..]
+ } else {
+ text
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
diff --git a/cli/tools/fmt.rs b/cli/tools/fmt.rs
index a02b86b17..33c3599d7 100644
--- a/cli/tools/fmt.rs
+++ b/cli/tools/fmt.rs
@@ -28,8 +28,6 @@ use std::path::PathBuf;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
-const BOM_CHAR: char = '\u{FEFF}';
-
/// Format JavaScript/TypeScript files.
pub async fn format(
args: Vec<PathBuf>,
@@ -350,12 +348,11 @@ fn read_file_contents(file_path: &Path) -> Result<FileContents, AnyError> {
let file_bytes = fs::read(&file_path)?;
let charset = text_encoding::detect_charset(&file_bytes);
let file_text = text_encoding::convert_to_utf8(&file_bytes, charset)?;
- let had_bom = file_text.starts_with(BOM_CHAR);
+ let had_bom = file_text.starts_with(text_encoding::BOM_CHAR);
let text = if had_bom {
- // remove the BOM
- String::from(&file_text[BOM_CHAR.len_utf8()..])
+ text_encoding::strip_bom(&file_text).to_string()
} else {
- String::from(file_text)
+ file_text.to_string()
};
Ok(FileContents { text, had_bom })
@@ -367,7 +364,7 @@ fn write_file_contents(
) -> Result<(), AnyError> {
let file_text = if file_contents.had_bom {
// add back the BOM
- format!("{}{}", BOM_CHAR, file_contents.text)
+ format!("{}{}", text_encoding::BOM_CHAR, file_contents.text)
} else {
file_contents.text
};