summaryrefslogtreecommitdiff
path: root/ext/fs/interface.rs
diff options
context:
space:
mode:
authorLuca Casonato <hello@lcas.dev>2024-06-08 18:36:13 +0200
committerGitHub <noreply@github.com>2024-06-08 18:36:13 +0200
commitc1f23c578881b85ae79b524a60160d8f4fb7151b (patch)
treec6c945fb4b42cd4ac6fae7135c7ad1039630a34e /ext/fs/interface.rs
parent22d34f7012c48a25435b38c0c306085c614bbea7 (diff)
fix(ext/node): lossy UTF-8 read node_modules files (#24140)
Previously various reads of files in `node_modules` would error on invalid UTF-8. These were cases involving: - reading package.json from Rust - reading package.json from JS - reading CommonJS files from JS - reading CommonJS files from Rust (for ESM translation) - reading ESM files from Rust
Diffstat (limited to 'ext/fs/interface.rs')
-rw-r--r--ext/fs/interface.rs25
1 files changed, 17 insertions, 8 deletions
diff --git a/ext/fs/interface.rs b/ext/fs/interface.rs
index 70f9fdf63..5031dc134 100644
--- a/ext/fs/interface.rs
+++ b/ext/fs/interface.rs
@@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
+use std::borrow::Cow;
use std::path::Path;
use std::path::PathBuf;
use std::rc::Rc;
@@ -284,24 +285,32 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync {
self.stat_sync(path).is_ok()
}
- fn read_text_file_sync(
+ fn read_text_file_lossy_sync(
&self,
path: &Path,
access_check: Option<AccessCheckCb>,
) -> FsResult<String> {
let buf = self.read_file_sync(path, access_check)?;
- String::from_utf8(buf).map_err(|err| {
- std::io::Error::new(std::io::ErrorKind::InvalidData, err).into()
- })
+ Ok(string_from_utf8_lossy(buf))
}
- async fn read_text_file_async<'a>(
+ async fn read_text_file_lossy_async<'a>(
&'a self,
path: PathBuf,
access_check: Option<AccessCheckCb<'a>>,
) -> FsResult<String> {
let buf = self.read_file_async(path, access_check).await?;
- String::from_utf8(buf).map_err(|err| {
- std::io::Error::new(std::io::ErrorKind::InvalidData, err).into()
- })
+ Ok(string_from_utf8_lossy(buf))
+ }
+}
+
+// Like String::from_utf8_lossy but operates on owned values
+#[inline(always)]
+fn string_from_utf8_lossy(buf: Vec<u8>) -> String {
+ match String::from_utf8_lossy(&buf) {
+ // buf contained non-utf8 chars than have been patched
+ Cow::Owned(s) => s,
+ // SAFETY: if Borrowed then the buf only contains utf8 chars,
+ // we do this instead of .into_owned() to avoid copying the input buf
+ Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) },
}
}