feat(ops): Fast zero copy string arguments (#16777)

Uses SeqOneByteString optimization to do zero-copy `&str` arguments in fast calls. - [x] Depends on https://github.com/denoland/rusty_v8/pull/1129 - [x] Depends on https://chromium-review.googlesource.com/c/v8/v8/+/4036884 - [x] Disable in async ops - [x] Make it work with owned `String` with an extra alloc in fast path. - [x] Support `Cow<'_, str>`. Owned for slow case, Borrowed for fast case ```rust #[op] fn op_string_len(s: &str) -> u32 { str.len() as u32 } ```
author: Divy Srivastava <dj.srivastava23@gmail.com> 2022-12-01 21:29:15 -0800
committer: GitHub <noreply@github.com> 2022-12-02 05:29:15 +0000
commit: 9b2b8df927ac23cfa99016a684179f2a3198ba2e (patch)
tree: 1d13b575bc7c4f7279b2ff3fdde175a7522d643a /ext/web/lib.rs
parent: 075854e5162c3d9f4fd7061d19acbe2c5855536e (diff)
1 files changed, 35 insertions, 17 deletions
diff --git a/ext/web/lib.rs b/ext/web/lib.rs
index cfbcee6e3..7c75a9a28 100644
--- a/ext/web/lib.rs
+++ b/ext/web/lib.rs
@@ -270,7 +270,7 @@ fn op_encoding_decode_single(
 #[op]
 fn op_encoding_new_decoder(
   state: &mut OpState,
-  label: String,
+  label: &str,
   fatal: bool,
   ignore_bom: bool,
 ) -> Result<ResourceId, AnyError> {
@@ -352,25 +352,43 @@ impl Resource for TextDecoderResource {
   }
 }
 
-#[op(v8)]
+#[op]
 fn op_encoding_encode_into(
-  scope: &mut v8::HandleScope,
-  input: serde_v8::Value,
+  input: Cow<'_, str>,
   buffer: &mut [u8],
   out_buf: &mut [u32],
-) -> Result<(), AnyError> {
-  let s = v8::Local::<v8::String>::try_from(input.v8_value)?;
-
-  let mut nchars = 0;
-  out_buf[1] = s.write_utf8(
-    scope,
-    buffer,
-    Some(&mut nchars),
-    v8::WriteOptions::NO_NULL_TERMINATION
-      | v8::WriteOptions::REPLACE_INVALID_UTF8,
-  ) as u32;
-  out_buf[0] = nchars as u32;
-  Ok(())
+) {
+  // Since `input` is already UTF-8, we can simply find the last UTF-8 code
+  // point boundary from input that fits in `buffer`, and copy the bytes up to
+  // that point.
+  let boundary = if buffer.len() >= input.len() {
+    input.len()
+  } else {
+    let mut boundary = buffer.len();
+
+    // The maximum length of a UTF-8 code point is 4 bytes.
+    for _ in 0..4 {
+      if input.is_char_boundary(boundary) {
+        break;
+      }
+      debug_assert!(boundary > 0);
+      boundary -= 1;
+    }
+
+    debug_assert!(input.is_char_boundary(boundary));
+    boundary
+  };
+
+  buffer[..boundary].copy_from_slice(input[..boundary].as_bytes());
+
+  // The `read` output parameter is measured in UTF-16 code units.
+  out_buf[0] = match input {
+    // Borrowed Cow strings are zero-copy views into the V8 heap.
+    // Thus, they are guarantee to be SeqOneByteString.
+    Cow::Borrowed(v) => v[..boundary].len() as u32,
+    Cow::Owned(v) => v[..boundary].encode_utf16().count() as u32,
+  };
+  out_buf[1] = boundary as u32;
 }
 
 #[op(v8)]
author	Divy Srivastava <dj.srivastava23@gmail.com>	2022-12-01 21:29:15 -0800
committer	GitHub <noreply@github.com>	2022-12-02 05:29:15 +0000
commit	9b2b8df927ac23cfa99016a684179f2a3198ba2e (patch)
tree	1d13b575bc7c4f7279b2ff3fdde175a7522d643a /ext/web/lib.rs
parent	075854e5162c3d9f4fd7061d19acbe2c5855536e (diff)