diff options
author | Luca Casonato <hello@lcas.dev> | 2024-06-08 18:36:13 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-08 18:36:13 +0200 |
commit | c1f23c578881b85ae79b524a60160d8f4fb7151b (patch) | |
tree | c6c945fb4b42cd4ac6fae7135c7ad1039630a34e /ext/fs/interface.rs | |
parent | 22d34f7012c48a25435b38c0c306085c614bbea7 (diff) |
fix(ext/node): lossy UTF-8 read node_modules files (#24140)
Previously various reads of files in `node_modules` would error on
invalid UTF-8. These were cases involving:
- reading package.json from Rust
- reading package.json from JS
- reading CommonJS files from JS
- reading CommonJS files from Rust (for ESM translation)
- reading ESM files from Rust
Diffstat (limited to 'ext/fs/interface.rs')
-rw-r--r-- | ext/fs/interface.rs | 25 |
1 files changed, 17 insertions, 8 deletions
diff --git a/ext/fs/interface.rs b/ext/fs/interface.rs index 70f9fdf63..5031dc134 100644 --- a/ext/fs/interface.rs +++ b/ext/fs/interface.rs @@ -1,5 +1,6 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. +use std::borrow::Cow; use std::path::Path; use std::path::PathBuf; use std::rc::Rc; @@ -284,24 +285,32 @@ pub trait FileSystem: std::fmt::Debug + MaybeSend + MaybeSync { self.stat_sync(path).is_ok() } - fn read_text_file_sync( + fn read_text_file_lossy_sync( &self, path: &Path, access_check: Option<AccessCheckCb>, ) -> FsResult<String> { let buf = self.read_file_sync(path, access_check)?; - String::from_utf8(buf).map_err(|err| { - std::io::Error::new(std::io::ErrorKind::InvalidData, err).into() - }) + Ok(string_from_utf8_lossy(buf)) } - async fn read_text_file_async<'a>( + async fn read_text_file_lossy_async<'a>( &'a self, path: PathBuf, access_check: Option<AccessCheckCb<'a>>, ) -> FsResult<String> { let buf = self.read_file_async(path, access_check).await?; - String::from_utf8(buf).map_err(|err| { - std::io::Error::new(std::io::ErrorKind::InvalidData, err).into() - }) + Ok(string_from_utf8_lossy(buf)) + } +} + +// Like String::from_utf8_lossy but operates on owned values +#[inline(always)] +fn string_from_utf8_lossy(buf: Vec<u8>) -> String { + match String::from_utf8_lossy(&buf) { + // buf contained non-utf8 chars than have been patched + Cow::Owned(s) => s, + // SAFETY: if Borrowed then the buf only contains utf8 chars, + // we do this instead of .into_owned() to avoid copying the input buf + Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(buf) }, } } |