1 files changed, 27 insertions, 16 deletions
diff --git a/extensions/web/lib.rs b/extensions/web/lib.rs
index 6e3552476..67022c7ea 100644
--- a/extensions/web/lib.rs
+++ b/extensions/web/lib.rs
@@ -298,23 +298,34 @@ fn op_encoding_encode_into(
   input: String,
   mut buffer: ZeroCopyBuf,
 ) -> Result<EncodeIntoResult, AnyError> {
-  let dst: &mut [u8] = &mut buffer;
-  let mut read = 0;
-  let mut written = 0;
-  for char in input.chars() {
-    let len = char.len_utf8();
-    if dst.len() < written + len {
-      break;
+  // Since `input` is already UTF-8, we can simply find the last UTF-8 code
+  // point boundary from input that fits in `buffer`, and copy the bytes up to
+  // that point.
+  let boundary = if buffer.len() >= input.len() {
+    input.len()
+  } else {
+    let mut boundary = buffer.len();
+
+    // The maximum length of a UTF-8 code point is 4 bytes.
+    for _ in 0..4 {
+      if input.is_char_boundary(boundary) {
+        break;
+      }
+      debug_assert!(boundary > 0);
+      boundary -= 1;
     }
-    char.encode_utf8(&mut dst[written..]);
-    written += len;
-    if char > '\u{FFFF}' {
-      read += 2
-    } else {
-      read += 1
-    };
-  }
-  Ok(EncodeIntoResult { read, written })
+
+    debug_assert!(input.is_char_boundary(boundary));
+    boundary
+  };
+
+  buffer[..boundary].copy_from_slice(input[..boundary].as_bytes());
+
+  Ok(EncodeIntoResult {
+    // The `read` output parameter is measured in UTF-16 code units.
+    read: input[..boundary].encode_utf16().count(),
+    written: boundary,
+  })
 }
 
 pub fn get_declaration() -> PathBuf {