summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Casonato <hello@lcas.dev>2024-06-08 18:36:13 +0200
committerGitHub <noreply@github.com>2024-06-08 18:36:13 +0200
commitc1f23c578881b85ae79b524a60160d8f4fb7151b (patch)
treec6c945fb4b42cd4ac6fae7135c7ad1039630a34e
parent22d34f7012c48a25435b38c0c306085c614bbea7 (diff)
fix(ext/node): lossy UTF-8 read node_modules files (#24140)
Previously various reads of files in `node_modules` would error on invalid UTF-8. These were cases involving: - reading package.json from Rust - reading package.json from JS - reading CommonJS files from JS - reading CommonJS files from Rust (for ESM translation) - reading ESM files from Rust
-rw-r--r--cli/node.rs2
-rw-r--r--cli/resolver.rs7
-rw-r--r--cli/util/gitignore.rs2
-rw-r--r--ext/fs/interface.rs25
-rw-r--r--ext/fs/ops.rs25
-rw-r--r--ext/node/ops/require.rs2
-rw-r--r--ext/node/package_json.rs2
-rw-r--r--tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/index.js1
-rw-r--r--tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/package.json6
-rw-r--r--tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/index.js1
-rw-r--r--tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/package.json7
-rw-r--r--tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/index.js1
-rw-r--r--tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/package.json6
-rw-r--r--tests/specs/npm/lossy_utf8_module/__test__.jsonc5
-rw-r--r--tests/specs/npm/lossy_utf8_module/main.mjs3
-rw-r--r--tests/specs/npm/lossy_utf8_module/main.out3
-rw-r--r--tests/specs/npm/lossy_utf8_package_json/__test__.jsonc5
-rw-r--r--tests/specs/npm/lossy_utf8_package_json/main.mjs3
-rw-r--r--tests/specs/npm/lossy_utf8_package_json/main.out3
-rw-r--r--tests/specs/npm/lossy_utf8_script/__test__.jsonc5
-rw-r--r--tests/specs/npm/lossy_utf8_script/main.mjs3
-rw-r--r--tests/specs/npm/lossy_utf8_script/main.out3
-rw-r--r--tests/specs/npm/lossy_utf8_script_from_cjs/__test__.jsonc6
-rw-r--r--tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs10
-rw-r--r--tests/specs/npm/lossy_utf8_script_from_cjs/main.out4
-rw-r--r--tests/util/server/src/npm.rs5
26 files changed, 112 insertions, 33 deletions
diff --git a/cli/node.rs b/cli/node.rs
index c696fcac9..5ecbacdc7 100644
--- a/cli/node.rs
+++ b/cli/node.rs
@@ -125,7 +125,7 @@ impl CjsCodeAnalyzer for CliCjsCodeAnalyzer {
None => {
self
.fs
- .read_text_file_async(specifier.to_file_path().unwrap(), None)
+ .read_text_file_lossy_async(specifier.to_file_path().unwrap(), None)
.await?
}
};
diff --git a/cli/resolver.rs b/cli/resolver.rs
index 301cd0666..3edc6f429 100644
--- a/cli/resolver.rs
+++ b/cli/resolver.rs
@@ -320,7 +320,12 @@ impl NpmModuleLoader {
let code = if self.cjs_resolutions.contains(specifier) {
// translate cjs to esm if it's cjs and inject node globals
- let code = String::from_utf8(code)?;
+ let code = match String::from_utf8_lossy(&code) {
+ Cow::Owned(code) => code,
+ // SAFETY: `String::from_utf8_lossy` guarantees that the result is valid
+ // UTF-8 if `Cow::Borrowed` is returned.
+ Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(code) },
+ };
ModuleSourceCode::String(
self
.node_code_translator
diff --git a/cli/util/gitignore.rs b/cli/util/gitignore.rs
index 12a450d64..4538e0912 100644
--- a/cli/util/gitignore.rs
+++ b/cli/util/gitignore.rs
@@ -105,7 +105,7 @@ impl GitIgnoreTree {
});
let current = self
.fs
- .read_text_file_sync(&dir_path.join(".gitignore"), None)
+ .read_text_file_lossy_sync(&dir_path.join(".gitignore"), None)
.ok()
.and_then(|text| {
let mut builder = ignore::gitignore::GitignoreBuilder::new(dir_path);
diff --git a/ext/fs/interface.rs b/ext/fs/interface.rs
index 70f9fdf63..5031dc134 100644
--- a/ext/fs/interface.rs
+++ b/ext/fs/interface.rs
@@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
+use std::borrow::Cow;
use std::path::Path;
use std::path::PathBuf;
use std::rc::Rc;
@@ -284,24 +285,32 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
self.stat_sync(path).is_ok()
}
- fn read_text_file_sync(
+ fn read_text_file_lossy_sync(
&self,
path: &Path,
access_check: Option<AccessCheckCb>,
) -> FsResult<String> {
let buf = self.read_file_sync(path, access_check)?;
- String::from_utf8(buf).map_err(|err| {
- std::io::Error::new(std::io::ErrorKind::InvalidData, err).into()
- })
+ Ok(string_from_utf8_lossy(buf))
}
- async fn read_text_file_async<'a>(
+ async fn read_text_file_lossy_async<'a>(
&'a self,
path: PathBuf,
access_check: Option<AccessCheckCb<'a>>,
) -> FsResult<String> {
let buf = self.read_file_async(path, access_check).await?;
- String::from_utf8(buf).map_err(|err| {
- std::io::Error::new(std::io::ErrorKind::InvalidData, err).into()
- })
+ Ok(string_from_utf8_lossy(buf))
+ }
+}
+
+// Like String::from_utf8_lossy but operates on owned values
+#[inline(always)]
+fn string_from_utf8_lossy(buf: Vec<u8>) -> String {
+ match String::from_utf8_lossy(&buf) {
+ // buf contained non-utf8 chars than have been patched
+ Cow::Owned(s) => s,
+ // SAFETY: if Borrowed then the buf only contains utf8 chars,
+ // we do this instead of .into_owned() to avoid copying the input buf
+ Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) },
}
}
diff --git a/ext/fs/ops.rs b/ext/fs/ops.rs
index 8e715d825..57b0e0b9e 100644
--- a/ext/fs/ops.rs
+++ b/ext/fs/ops.rs
@@ -1,6 +1,5 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
-use std::borrow::Cow;
use std::cell::RefCell;
use std::io;
use std::io::SeekFrom;
@@ -1333,11 +1332,11 @@ where
let fs = state.borrow::<FileSystemRc>().clone();
let mut access_check =
sync_permission_check::<P>(state.borrow_mut(), "Deno.readFileSync()");
- let buf = fs
- .read_file_sync(&path, Some(&mut access_check))
+ let str = fs
+ .read_text_file_lossy_sync(&path, Some(&mut access_check))
.map_err(|error| map_permission_error("readfile", error, &path))?;
- Ok(string_from_utf8_lossy(buf))
+ Ok(str)
}
#[op2(async)]
@@ -1361,9 +1360,10 @@ where
(state.borrow::<FileSystemRc>().clone(), cancel_handle)
};
- let fut = fs.read_file_async(path.clone(), Some(&mut access_check));
+ let fut =
+ fs.read_text_file_lossy_async(path.clone(), Some(&mut access_check));
- let buf = if let Some(cancel_handle) = cancel_handle {
+ let str = if let Some(cancel_handle) = cancel_handle {
let res = fut.or_cancel(cancel_handle).await;
if let Some(cancel_rid) = cancel_rid {
@@ -1379,18 +1379,7 @@ where
.map_err(|error| map_permission_error("readfile", error, &path))?
};
- Ok(string_from_utf8_lossy(buf))
-}
-
-// Like String::from_utf8_lossy but operates on owned values
-fn string_from_utf8_lossy(buf: Vec<u8>) -> String {
- match String::from_utf8_lossy(&buf) {
- // buf contained non-utf8 chars than have been patched
- Cow::Owned(s) => s,
- // SAFETY: if Borrowed then the buf only contains utf8 chars,
- // we do this instead of .into_owned() to avoid copying the input buf
- Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) },
- }
+ Ok(str)
}
fn to_seek_from(offset: i64, whence: i32) -> Result<SeekFrom, AnyError> {
diff --git a/ext/node/ops/require.rs b/ext/node/ops/require.rs
index de2687001..3e1f1c6ae 100644
--- a/ext/node/ops/require.rs
+++ b/ext/node/ops/require.rs
@@ -451,7 +451,7 @@ where
let file_path = PathBuf::from(file_path);
ensure_read_permission::<P>(state, &file_path)?;
let fs = state.borrow::<FileSystemRc>();
- Ok(fs.read_text_file_sync(&file_path, None)?)
+ Ok(fs.read_text_file_lossy_sync(&file_path, None)?)
}
#[op2]
diff --git a/ext/node/package_json.rs b/ext/node/package_json.rs
index adae7d634..a19a2d64d 100644
--- a/ext/node/package_json.rs
+++ b/ext/node/package_json.rs
@@ -82,7 +82,7 @@ impl PackageJson {
return Ok(CACHE.with(|cache| cache.borrow()[&path].clone()));
}
- let source = match fs.read_text_file_sync(&path, None) {
+ let source = match fs.read_text_file_lossy_sync(&path, None) {
Ok(source) => source,
Err(err) if err.kind() == ErrorKind::NotFound => {
return Ok(Rc::new(PackageJson::empty(path)));
diff --git a/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/index.js b/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/index.js
new file mode 100644
index 000000000..411f3c372
--- /dev/null
+++ b/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/index.js
@@ -0,0 +1 @@
+export default 'þþÿÿ'; \ No newline at end of file
diff --git a/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/package.json b/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/package.json
new file mode 100644
index 000000000..1260655fb
--- /dev/null
+++ b/tests/registry/npm/@denotest/lossy-utf8-module/1.0.0/package.json
@@ -0,0 +1,6 @@
+{
+ "name": "@denotest/lossy-utf8-script",
+ "version": "1.0.0",
+ "type": "module",
+ "dependencies": {}
+}
diff --git a/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/index.js b/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/index.js
new file mode 100644
index 000000000..34b58e6e1
--- /dev/null
+++ b/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/index.js
@@ -0,0 +1 @@
+export default "hello";
diff --git a/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/package.json b/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/package.json
new file mode 100644
index 000000000..5ca42d0d1
--- /dev/null
+++ b/tests/registry/npm/@denotest/lossy-utf8-package-json/1.0.0/package.json
@@ -0,0 +1,7 @@
+{
+ "name": "@denotest/lossy-utf8-package-json",
+ "version": "1.0.0",
+ "type": "module",
+ "dependencies": {},
+ "files": ["þþÿÿ"]
+} \ No newline at end of file
diff --git a/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/index.js b/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/index.js
new file mode 100644
index 000000000..adbfd382a
--- /dev/null
+++ b/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/index.js
@@ -0,0 +1 @@
+module.exports = 'þþÿÿ'; \ No newline at end of file
diff --git a/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/package.json b/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/package.json
new file mode 100644
index 000000000..e23605945
--- /dev/null
+++ b/tests/registry/npm/@denotest/lossy-utf8-script/1.0.0/package.json
@@ -0,0 +1,6 @@
+{
+ "name": "@denotest/lossy-utf8-script",
+ "version": "1.0.0",
+ "type": "commonjs",
+ "dependencies": {}
+}
diff --git a/tests/specs/npm/lossy_utf8_module/__test__.jsonc b/tests/specs/npm/lossy_utf8_module/__test__.jsonc
new file mode 100644
index 000000000..1ee7d2d6c
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_module/__test__.jsonc
@@ -0,0 +1,5 @@
+{
+ "args": "run main.mjs",
+ "output": "main.out",
+ "exitCode": 0
+}
diff --git a/tests/specs/npm/lossy_utf8_module/main.mjs b/tests/specs/npm/lossy_utf8_module/main.mjs
new file mode 100644
index 000000000..54cfbb16a
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_module/main.mjs
@@ -0,0 +1,3 @@
+import mod from "npm:@denotest/lossy-utf8-module@1.0.0";
+
+console.log(mod);
diff --git a/tests/specs/npm/lossy_utf8_module/main.out b/tests/specs/npm/lossy_utf8_module/main.out
new file mode 100644
index 000000000..0e96f9ebb
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_module/main.out
@@ -0,0 +1,3 @@
+Download http://localhost:4260/@denotest/lossy-utf8-module
+Download http://localhost:4260/@denotest/lossy-utf8-module/1.0.0.tgz
+����
diff --git a/tests/specs/npm/lossy_utf8_package_json/__test__.jsonc b/tests/specs/npm/lossy_utf8_package_json/__test__.jsonc
new file mode 100644
index 000000000..1ee7d2d6c
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_package_json/__test__.jsonc
@@ -0,0 +1,5 @@
+{
+ "args": "run main.mjs",
+ "output": "main.out",
+ "exitCode": 0
+}
diff --git a/tests/specs/npm/lossy_utf8_package_json/main.mjs b/tests/specs/npm/lossy_utf8_package_json/main.mjs
new file mode 100644
index 000000000..9a63eb604
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_package_json/main.mjs
@@ -0,0 +1,3 @@
+import mod from "npm:@denotest/lossy-utf8-package-json@1.0.0";
+
+console.log(mod);
diff --git a/tests/specs/npm/lossy_utf8_package_json/main.out b/tests/specs/npm/lossy_utf8_package_json/main.out
new file mode 100644
index 000000000..99aa5ab61
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_package_json/main.out
@@ -0,0 +1,3 @@
+Download http://localhost:4260/@denotest/lossy-utf8-package-json
+Download http://localhost:4260/@denotest/lossy-utf8-package-json/1.0.0.tgz
+hello
diff --git a/tests/specs/npm/lossy_utf8_script/__test__.jsonc b/tests/specs/npm/lossy_utf8_script/__test__.jsonc
new file mode 100644
index 000000000..1ee7d2d6c
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_script/__test__.jsonc
@@ -0,0 +1,5 @@
+{
+ "args": "run main.mjs",
+ "output": "main.out",
+ "exitCode": 0
+}
diff --git a/tests/specs/npm/lossy_utf8_script/main.mjs b/tests/specs/npm/lossy_utf8_script/main.mjs
new file mode 100644
index 000000000..59fe555aa
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_script/main.mjs
@@ -0,0 +1,3 @@
+import mod from "npm:@denotest/lossy-utf8-script@1.0.0";
+
+console.log(mod);
diff --git a/tests/specs/npm/lossy_utf8_script/main.out b/tests/specs/npm/lossy_utf8_script/main.out
new file mode 100644
index 000000000..180ecdf1c
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_script/main.out
@@ -0,0 +1,3 @@
+Download http://localhost:4260/@denotest/lossy-utf8-script
+Download http://localhost:4260/@denotest/lossy-utf8-script/1.0.0.tgz
+����
diff --git a/tests/specs/npm/lossy_utf8_script_from_cjs/__test__.jsonc b/tests/specs/npm/lossy_utf8_script_from_cjs/__test__.jsonc
new file mode 100644
index 000000000..c8d353de0
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_script_from_cjs/__test__.jsonc
@@ -0,0 +1,6 @@
+{
+ "args": "run --node-modules-dir --allow-read main.mjs",
+ "output": "main.out",
+ "exitCode": 0,
+ "tempDir": true
+}
diff --git a/tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs b/tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs
new file mode 100644
index 000000000..a9e70adfb
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_script_from_cjs/main.mjs
@@ -0,0 +1,10 @@
+import { createRequire } from "node:module";
+
+// Import this so that deno_graph knows to download this file.
+if (false) import("npm:@denotest/lossy-utf8-script@1.0.0");
+
+const require = createRequire(import.meta.url);
+
+const mod = require("@denotest/lossy-utf8-script");
+
+console.log(mod);
diff --git a/tests/specs/npm/lossy_utf8_script_from_cjs/main.out b/tests/specs/npm/lossy_utf8_script_from_cjs/main.out
new file mode 100644
index 000000000..4f062a2ae
--- /dev/null
+++ b/tests/specs/npm/lossy_utf8_script_from_cjs/main.out
@@ -0,0 +1,4 @@
+Download http://localhost:4260/@denotest/lossy-utf8-script
+Download http://localhost:4260/@denotest/lossy-utf8-script/1.0.0.tgz
+Initialize @denotest/lossy-utf8-script@1.0.0
+����
diff --git a/tests/util/server/src/npm.rs b/tests/util/server/src/npm.rs
index 66b7bddcd..e7d8d96ab 100644
--- a/tests/util/server/src/npm.rs
+++ b/tests/util/server/src/npm.rs
@@ -226,10 +226,11 @@ fn get_npm_package(
tarballs.insert(version.clone(), tarball_bytes);
let package_json_path = version_folder.join("package.json");
- let package_json_text = fs::read_to_string(&package_json_path)
- .with_context(|| {
+ let package_json_bytes =
+ fs::read(&package_json_path).with_context(|| {
format!("Error reading package.json at {}", package_json_path)
})?;
+ let package_json_text = String::from_utf8_lossy(&package_json_bytes);
let mut version_info: serde_json::Map<String, serde_json::Value> =
serde_json::from_str(&package_json_text)?;
version_info.insert("dist".to_string(), dist.into());