3 files changed, 74 insertions, 28 deletions
diff --git a/ext/web/08_text_encoding.js b/ext/web/08_text_encoding.js
index 4477d9b9e..bf4b33808 100644
--- a/ext/web/08_text_encoding.js
+++ b/ext/web/08_text_encoding.js
@@ -16,14 +16,14 @@
   const ops = core.ops;
   const webidl = window.__bootstrap.webidl;
   const {
-    ArrayBufferIsView,
-    ObjectPrototypeIsPrototypeOf,
     PromiseReject,
     PromiseResolve,
     StringPrototypeCharCodeAt,
     StringPrototypeSlice,
     TypedArrayPrototypeSubarray,
     Uint8Array,
+    ObjectPrototypeIsPrototypeOf,
+    ArrayBufferIsView,
     Uint32Array,
   } = window.__bootstrap.primordials;
 
@@ -34,6 +34,8 @@
     #fatal;
     /** @type {boolean} */
     #ignoreBOM;
+    /** @type {boolean} */
+    #utf8SinglePass;
 
     /** @type {number | null} */
     #rid = null;
@@ -56,6 +58,7 @@
       this.#encoding = encoding;
       this.#fatal = options.fatal;
       this.#ignoreBOM = options.ignoreBOM;
+      this.#utf8SinglePass = encoding === "utf-8" && !options.fatal;
       this[webidl.brand] = webidl.brand;
     }
 
@@ -81,7 +84,7 @@
      * @param {BufferSource} [input]
      * @param {TextDecodeOptions} options
      */
-    decode(input = new Uint8Array(), options = {}) {
+    decode(input = new Uint8Array(), options = undefined) {
       webidl.assertBranded(this, TextDecoderPrototype);
       const prefix = "Failed to execute 'decode' on 'TextDecoder'";
       if (input !== undefined) {
@@ -91,40 +94,46 @@
           allowShared: true,
         });
       }
-      options = webidl.converters.TextDecodeOptions(options, {
-        prefix,
-        context: "Argument 2",
-      });
+      let stream = false;
+      if (options !== undefined) {
+        options = webidl.converters.TextDecodeOptions(options, {
+          prefix,
+          context: "Argument 2",
+        });
+        stream = options.stream;
+      }
 
       try {
-        try {
-          if (ArrayBufferIsView(input)) {
-            input = new Uint8Array(
-              input.buffer,
-              input.byteOffset,
-              input.byteLength,
-            );
-          } else {
-            input = new Uint8Array(input);
-          }
-        } catch {
-          // If the buffer is detached, just create a new empty Uint8Array.
-          input = new Uint8Array();
-        }
+        // Note from spec: implementations are strongly encouraged to use an implementation strategy that avoids this copy.
+        // When doing so they will have to make sure that changes to input do not affect future calls to decode().
         if (
           ObjectPrototypeIsPrototypeOf(
             SharedArrayBuffer.prototype,
-            input.buffer,
+            input || input.buffer,
           )
         ) {
           // We clone the data into a non-shared ArrayBuffer so we can pass it
           // to Rust.
           // `input` is now a Uint8Array, and calling the TypedArray constructor
           // with a TypedArray argument copies the data.
-          input = new Uint8Array(input);
+          if (ArrayBufferIsView(input)) {
+            input = new Uint8Array(
+              input.buffer,
+              input.byteOffset,
+              input.byteLength,
+            );
+          } else {
+            input = new Uint8Array(input);
+          }
         }
 
-        if (!options.stream && this.#rid === null) {
+        // Fast path for single pass encoding.
+        if (!stream && this.#rid === null) {
+          // Fast path for utf8 single pass encoding.
+          if (this.#utf8SinglePass) {
+            return ops.op_encoding_decode_utf8(input, this.#ignoreBOM);
+          }
+
           return ops.op_encoding_decode_single(
             input,
             this.#encoding,
@@ -140,9 +149,9 @@
             this.#ignoreBOM,
           );
         }
-        return ops.op_encoding_decode(input, this.#rid, options.stream);
+        return ops.op_encoding_decode(input, this.#rid, stream);
       } finally {
-        if (!options.stream && this.#rid !== null) {
+        if (!stream && this.#rid !== null) {
           core.close(this.#rid);
           this.#rid = null;
         }
diff --git a/ext/web/lib.rs b/ext/web/lib.rs
index 588a3adfd..f799f02e7 100644
--- a/ext/web/lib.rs
+++ b/ext/web/lib.rs
@@ -91,6 +91,7 @@ pub fn init<P: TimersPermission + 'static>(
       op_base64_btoa::decl(),
       op_encoding_normalize_label::decl(),
       op_encoding_decode_single::decl(),
+      op_encoding_decode_utf8::decl(),
       op_encoding_new_decoder::decl(),
       op_encoding_decode::decl(),
       op_encoding_encode_into::decl(),
@@ -179,6 +180,39 @@ fn op_encoding_normalize_label(label: String) -> Result<String, AnyError> {
   Ok(encoding.name().to_lowercase())
 }
 
+#[op(v8)]
+fn op_encoding_decode_utf8<'a>(
+  scope: &mut v8::HandleScope<'a>,
+  zero_copy: &[u8],
+  ignore_bom: bool,
+) -> Result<serde_v8::Value<'a>, AnyError> {
+  let buf = &zero_copy;
+
+  let buf = if !ignore_bom
+    && buf.len() >= 3
+    && buf[0] == 0xef
+    && buf[1] == 0xbb
+    && buf[2] == 0xbf
+  {
+    &buf[3..]
+  } else {
+    buf
+  };
+
+  // If `String::new_from_utf8()` returns `None`, this means that the
+  // length of the decoded string would be longer than what V8 can
+  // handle. In this case we return `RangeError`.
+  //
+  // For more details see:
+  // - https://encoding.spec.whatwg.org/#dom-textdecoder-decode
+  // - https://github.com/denoland/deno/issues/6649
+  // - https://github.com/v8/v8/blob/d68fb4733e39525f9ff0a9222107c02c28096e2a/include/v8.h#L3277-L3278
+  match v8::String::new_from_utf8(scope, buf, v8::NewStringType::Normal) {
+    Some(text) => Ok(serde_v8::from_v8(scope, text.into())?),
+    None => Err(type_error("buffer exceeds maximum length")),
+  }
+}
+
 #[op]
 fn op_encoding_decode_single(
   data: &[u8],
diff --git a/ops/lib.rs b/ops/lib.rs
index 298327af2..d295ec9bd 100644
--- a/ops/lib.rs
+++ b/ops/lib.rs
@@ -449,13 +449,16 @@ fn codegen_u8_slice(core: &TokenStream2, idx: usize) -> TokenStream2 {
     let value = args.get(#idx as i32);
     match #core::v8::Local::<#core::v8::ArrayBuffer>::try_from(value) {
       Ok(b) => {
+        // Handles detached buffers.
+        let byte_length = b.byte_length();
         let store = b.data() as *mut u8;
         // SAFETY: rust guarantees that lifetime of slice is no longer than the call.
-        unsafe { ::std::slice::from_raw_parts_mut(store, b.byte_length()) }
+        unsafe { ::std::slice::from_raw_parts_mut(store, byte_length) }
       },
       Err(_) => {
         if let Ok(view) = #core::v8::Local::<#core::v8::ArrayBufferView>::try_from(value) {
-          let (offset, len) = (view.byte_offset(), view.byte_length());
+          let len = view.byte_length();
+          let offset = view.byte_offset();
           let buffer = match view.buffer(scope) {
               Some(v) => v,
               None => {