diff options
author | Divy Srivastava <dj.srivastava23@gmail.com> | 2022-12-01 21:29:15 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-02 05:29:15 +0000 |
commit | 9b2b8df927ac23cfa99016a684179f2a3198ba2e (patch) | |
tree | 1d13b575bc7c4f7279b2ff3fdde175a7522d643a /ext/web/lib.rs | |
parent | 075854e5162c3d9f4fd7061d19acbe2c5855536e (diff) |
feat(ops): Fast zero copy string arguments (#16777)
Uses SeqOneByteString optimization to do zero-copy `&str` arguments in
fast calls.
- [x] Depends on https://github.com/denoland/rusty_v8/pull/1129
- [x] Depends on
https://chromium-review.googlesource.com/c/v8/v8/+/4036884
- [x] Disable in async ops
- [x] Make it work with owned `String` with an extra alloc in fast path.
- [x] Support `Cow<'_, str>`. Owned for slow case, Borrowed for fast
case
```rust
#[op]
fn op_string_len(s: &str) -> u32 {
str.len() as u32
}
```
Diffstat (limited to 'ext/web/lib.rs')
-rw-r--r-- | ext/web/lib.rs | 52 |
1 files changed, 35 insertions, 17 deletions
diff --git a/ext/web/lib.rs b/ext/web/lib.rs index cfbcee6e3..7c75a9a28 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -270,7 +270,7 @@ fn op_encoding_decode_single( #[op] fn op_encoding_new_decoder( state: &mut OpState, - label: String, + label: &str, fatal: bool, ignore_bom: bool, ) -> Result<ResourceId, AnyError> { @@ -352,25 +352,43 @@ impl Resource for TextDecoderResource { } } -#[op(v8)] +#[op] fn op_encoding_encode_into( - scope: &mut v8::HandleScope, - input: serde_v8::Value, + input: Cow<'_, str>, buffer: &mut [u8], out_buf: &mut [u32], -) -> Result<(), AnyError> { - let s = v8::Local::<v8::String>::try_from(input.v8_value)?; - - let mut nchars = 0; - out_buf[1] = s.write_utf8( - scope, - buffer, - Some(&mut nchars), - v8::WriteOptions::NO_NULL_TERMINATION - | v8::WriteOptions::REPLACE_INVALID_UTF8, - ) as u32; - out_buf[0] = nchars as u32; - Ok(()) +) { + // Since `input` is already UTF-8, we can simply find the last UTF-8 code + // point boundary from input that fits in `buffer`, and copy the bytes up to + // that point. + let boundary = if buffer.len() >= input.len() { + input.len() + } else { + let mut boundary = buffer.len(); + + // The maximum length of a UTF-8 code point is 4 bytes. + for _ in 0..4 { + if input.is_char_boundary(boundary) { + break; + } + debug_assert!(boundary > 0); + boundary -= 1; + } + + debug_assert!(input.is_char_boundary(boundary)); + boundary + }; + + buffer[..boundary].copy_from_slice(input[..boundary].as_bytes()); + + // The `read` output parameter is measured in UTF-16 code units. + out_buf[0] = match input { + // Borrowed Cow strings are zero-copy views into the V8 heap. + // Thus, they are guarantee to be SeqOneByteString. + Cow::Borrowed(v) => v[..boundary].len() as u32, + Cow::Owned(v) => v[..boundary].encode_utf16().count() as u32, + }; + out_buf[1] = boundary as u32; } #[op(v8)] |