summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/web/08_text_encoding.js61
-rw-r--r--ext/web/lib.rs34
-rw-r--r--ops/lib.rs7
3 files changed, 74 insertions, 28 deletions
diff --git a/ext/web/08_text_encoding.js b/ext/web/08_text_encoding.js
index 4477d9b9e..bf4b33808 100644
--- a/ext/web/08_text_encoding.js
+++ b/ext/web/08_text_encoding.js
@@ -16,14 +16,14 @@
const ops = core.ops;
const webidl = window.__bootstrap.webidl;
const {
- ArrayBufferIsView,
- ObjectPrototypeIsPrototypeOf,
PromiseReject,
PromiseResolve,
StringPrototypeCharCodeAt,
StringPrototypeSlice,
TypedArrayPrototypeSubarray,
Uint8Array,
+ ObjectPrototypeIsPrototypeOf,
+ ArrayBufferIsView,
Uint32Array,
} = window.__bootstrap.primordials;
@@ -34,6 +34,8 @@
#fatal;
/** @type {boolean} */
#ignoreBOM;
+ /** @type {boolean} */
+ #utf8SinglePass;
/** @type {number | null} */
#rid = null;
@@ -56,6 +58,7 @@
this.#encoding = encoding;
this.#fatal = options.fatal;
this.#ignoreBOM = options.ignoreBOM;
+ this.#utf8SinglePass = encoding === "utf-8" && !options.fatal;
this[webidl.brand] = webidl.brand;
}
@@ -81,7 +84,7 @@
* @param {BufferSource} [input]
* @param {TextDecodeOptions} options
*/
- decode(input = new Uint8Array(), options = {}) {
+ decode(input = new Uint8Array(), options = undefined) {
webidl.assertBranded(this, TextDecoderPrototype);
const prefix = "Failed to execute 'decode' on 'TextDecoder'";
if (input !== undefined) {
@@ -91,40 +94,46 @@
allowShared: true,
});
}
- options = webidl.converters.TextDecodeOptions(options, {
- prefix,
- context: "Argument 2",
- });
+ let stream = false;
+ if (options !== undefined) {
+ options = webidl.converters.TextDecodeOptions(options, {
+ prefix,
+ context: "Argument 2",
+ });
+ stream = options.stream;
+ }
try {
- try {
- if (ArrayBufferIsView(input)) {
- input = new Uint8Array(
- input.buffer,
- input.byteOffset,
- input.byteLength,
- );
- } else {
- input = new Uint8Array(input);
- }
- } catch {
- // If the buffer is detached, just create a new empty Uint8Array.
- input = new Uint8Array();
- }
+ // Note from spec: implementations are strongly encouraged to use an implementation strategy that avoids this copy.
+ // When doing so they will have to make sure that changes to input do not affect future calls to decode().
if (
ObjectPrototypeIsPrototypeOf(
SharedArrayBuffer.prototype,
- input.buffer,
+ input || input.buffer,
)
) {
// We clone the data into a non-shared ArrayBuffer so we can pass it
// to Rust.
// `input` is now a Uint8Array, and calling the TypedArray constructor
// with a TypedArray argument copies the data.
- input = new Uint8Array(input);
+ if (ArrayBufferIsView(input)) {
+ input = new Uint8Array(
+ input.buffer,
+ input.byteOffset,
+ input.byteLength,
+ );
+ } else {
+ input = new Uint8Array(input);
+ }
}
- if (!options.stream && this.#rid === null) {
+ // Fast path for single pass encoding.
+ if (!stream && this.#rid === null) {
+ // Fast path for utf8 single pass encoding.
+ if (this.#utf8SinglePass) {
+ return ops.op_encoding_decode_utf8(input, this.#ignoreBOM);
+ }
+
return ops.op_encoding_decode_single(
input,
this.#encoding,
@@ -140,9 +149,9 @@
this.#ignoreBOM,
);
}
- return ops.op_encoding_decode(input, this.#rid, options.stream);
+ return ops.op_encoding_decode(input, this.#rid, stream);
} finally {
- if (!options.stream && this.#rid !== null) {
+ if (!stream && this.#rid !== null) {
core.close(this.#rid);
this.#rid = null;
}
diff --git a/ext/web/lib.rs b/ext/web/lib.rs
index 588a3adfd..f799f02e7 100644
--- a/ext/web/lib.rs
+++ b/ext/web/lib.rs
@@ -91,6 +91,7 @@ pub fn init<P: TimersPermission + 'static>(
op_base64_btoa::decl(),
op_encoding_normalize_label::decl(),
op_encoding_decode_single::decl(),
+ op_encoding_decode_utf8::decl(),
op_encoding_new_decoder::decl(),
op_encoding_decode::decl(),
op_encoding_encode_into::decl(),
@@ -179,6 +180,39 @@ fn op_encoding_normalize_label(label: String) -> Result<String, AnyError> {
Ok(encoding.name().to_lowercase())
}
+#[op(v8)]
+fn op_encoding_decode_utf8<'a>(
+ scope: &mut v8::HandleScope<'a>,
+ zero_copy: &[u8],
+ ignore_bom: bool,
+) -> Result<serde_v8::Value<'a>, AnyError> {
+ let buf = &zero_copy;
+
+ let buf = if !ignore_bom
+ && buf.len() >= 3
+ && buf[0] == 0xef
+ && buf[1] == 0xbb
+ && buf[2] == 0xbf
+ {
+ &buf[3..]
+ } else {
+ buf
+ };
+
+ // If `String::new_from_utf8()` returns `None`, this means that the
+ // length of the decoded string would be longer than what V8 can
+ // handle. In this case we return `RangeError`.
+ //
+ // For more details see:
+ // - https://encoding.spec.whatwg.org/#dom-textdecoder-decode
+ // - https://github.com/denoland/deno/issues/6649
+ // - https://github.com/v8/v8/blob/d68fb4733e39525f9ff0a9222107c02c28096e2a/include/v8.h#L3277-L3278
+ match v8::String::new_from_utf8(scope, buf, v8::NewStringType::Normal) {
+ Some(text) => Ok(serde_v8::from_v8(scope, text.into())?),
+ None => Err(type_error("buffer exceeds maximum length")),
+ }
+}
+
#[op]
fn op_encoding_decode_single(
data: &[u8],
diff --git a/ops/lib.rs b/ops/lib.rs
index 298327af2..d295ec9bd 100644
--- a/ops/lib.rs
+++ b/ops/lib.rs
@@ -449,13 +449,16 @@ fn codegen_u8_slice(core: &TokenStream2, idx: usize) -> TokenStream2 {
let value = args.get(#idx as i32);
match #core::v8::Local::<#core::v8::ArrayBuffer>::try_from(value) {
Ok(b) => {
+ // Handles detached buffers.
+ let byte_length = b.byte_length();
let store = b.data() as *mut u8;
// SAFETY: rust guarantees that lifetime of slice is no longer than the call.
- unsafe { ::std::slice::from_raw_parts_mut(store, b.byte_length()) }
+ unsafe { ::std::slice::from_raw_parts_mut(store, byte_length) }
},
Err(_) => {
if let Ok(view) = #core::v8::Local::<#core::v8::ArrayBufferView>::try_from(value) {
- let (offset, len) = (view.byte_offset(), view.byte_length());
+ let len = view.byte_length();
+ let offset = view.byte_offset();
let buffer = match view.buffer(scope) {
Some(v) => v,
None => {