summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreu Botella <andreu@andreubotella.com>2022-03-16 00:22:00 +0100
committerGitHub <noreply@github.com>2022-03-16 00:22:00 +0100
commit672f66dde1f7ec87282d37e10cac2cdd36e5f181 (patch)
treeaa662ed245efe575841d0a951d8ce36f53b48ed4
parentbb53135ed87ec063c9238e1b7de8cf3b44535685 (diff)
perf(web): Optimize `TextDecoder` by adding a new `U16String` type (#13923)
-rw-r--r--core/lib.rs1
-rw-r--r--ext/web/lib.rs34
-rw-r--r--serde_v8/src/lib.rs1
-rw-r--r--serde_v8/src/magic/mod.rs1
-rw-r--r--serde_v8/src/magic/u16string.rs62
-rw-r--r--serde_v8/src/ser.rs59
-rw-r--r--serde_v8/src/serializable.rs5
7 files changed, 147 insertions, 16 deletions
diff --git a/core/lib.rs b/core/lib.rs
index de794b30f..b9391aff6 100644
--- a/core/lib.rs
+++ b/core/lib.rs
@@ -27,6 +27,7 @@ pub use serde_v8;
pub use serde_v8::Buffer as ZeroCopyBuf;
pub use serde_v8::ByteString;
pub use serde_v8::StringOrBuffer;
+pub use serde_v8::U16String;
pub use url;
pub use v8;
diff --git a/ext/web/lib.rs b/ext/web/lib.rs
index 17133e156..75e619dfe 100644
--- a/ext/web/lib.rs
+++ b/ext/web/lib.rs
@@ -16,6 +16,7 @@ use deno_core::Extension;
use deno_core::OpState;
use deno_core::Resource;
use deno_core::ResourceId;
+use deno_core::U16String;
use deno_core::ZeroCopyBuf;
use encoding_rs::CoderResult;
use encoding_rs::Decoder;
@@ -268,7 +269,7 @@ fn op_encoding_decode(
state: &mut OpState,
data: ZeroCopyBuf,
options: DecodeOptions,
-) -> Result<String, AnyError> {
+) -> Result<U16String, AnyError> {
let DecodeOptions { rid, stream } = options;
let resource = state.resource_table.get::<TextDecoderResource>(rid)?;
@@ -276,23 +277,20 @@ fn op_encoding_decode(
let mut decoder = resource.decoder.borrow_mut();
let fatal = resource.fatal;
- let max_buffer_length = if fatal {
- decoder
- .max_utf8_buffer_length_without_replacement(data.len())
- .ok_or_else(|| range_error("Value too large to decode."))?
- } else {
- decoder
- .max_utf8_buffer_length(data.len())
- .ok_or_else(|| range_error("Value too large to decode."))?
- };
+ let max_buffer_length = decoder
+ .max_utf16_buffer_length(data.len())
+ .ok_or_else(|| range_error("Value too large to decode."))?;
- let mut output = String::with_capacity(max_buffer_length);
+ let mut output = U16String::with_zeroes(max_buffer_length);
if fatal {
- let (result, _) =
- decoder.decode_to_string_without_replacement(&data, &mut output, !stream);
+ let (result, _, written) =
+ decoder.decode_to_utf16_without_replacement(&data, &mut output, !stream);
match result {
- DecoderResult::InputEmpty => Ok(output),
+ DecoderResult::InputEmpty => {
+ output.truncate(written);
+ Ok(output)
+ }
DecoderResult::OutputFull => {
Err(range_error("Provided buffer too small."))
}
@@ -301,9 +299,13 @@ fn op_encoding_decode(
}
}
} else {
- let (result, _, _) = decoder.decode_to_string(&data, &mut output, !stream);
+ let (result, _, written, _) =
+ decoder.decode_to_utf16(&data, &mut output, !stream);
match result {
- CoderResult::InputEmpty => Ok(output),
+ CoderResult::InputEmpty => {
+ output.truncate(written);
+ Ok(output)
+ }
CoderResult::OutputFull => Err(range_error("Provided buffer too small.")),
}
}
diff --git a/serde_v8/src/lib.rs b/serde_v8/src/lib.rs
index 9d0715195..203e1d004 100644
--- a/serde_v8/src/lib.rs
+++ b/serde_v8/src/lib.rs
@@ -14,6 +14,7 @@ pub use keys::KeyCache;
pub use magic::buffer::MagicBuffer as Buffer;
pub use magic::bytestring::ByteString;
pub use magic::string_or_buffer::StringOrBuffer;
+pub use magic::u16string::U16String;
pub use magic::Value;
pub use ser::{to_v8, Serializer};
pub use serializable::{Serializable, SerializablePkg};
diff --git a/serde_v8/src/magic/mod.rs b/serde_v8/src/magic/mod.rs
index 941a69d41..e90b5ab60 100644
--- a/serde_v8/src/magic/mod.rs
+++ b/serde_v8/src/magic/mod.rs
@@ -3,6 +3,7 @@ pub mod buffer;
pub mod bytestring;
mod field;
pub mod string_or_buffer;
+pub mod u16string;
mod value;
pub mod zero_copy_buf;
diff --git a/serde_v8/src/magic/u16string.rs b/serde_v8/src/magic/u16string.rs
new file mode 100644
index 000000000..a75af016b
--- /dev/null
+++ b/serde_v8/src/magic/u16string.rs
@@ -0,0 +1,62 @@
+use std::ops::{Deref, DerefMut};
+
+use serde::Serialize;
+
+pub const NAME: &str = "$__v8_magic_u16string";
+pub const FIELD_PTR: &str = "$__v8_magic_u16string_ptr";
+pub const FIELD_LEN: &str = "$__v8_magic_u16string_len";
+
+#[derive(Default, PartialEq, Eq)]
+pub struct U16String(pub Vec<u16>);
+
+impl U16String {
+ pub fn with_zeroes(length: usize) -> U16String {
+ U16String(vec![0u16; length])
+ }
+
+ pub fn truncate(&mut self, new_length: usize) {
+ self.0.truncate(new_length);
+ self.0.shrink_to_fit()
+ }
+}
+
+impl Deref for U16String {
+ type Target = [u16];
+ fn deref(&self) -> &[u16] {
+ self.0.deref()
+ }
+}
+
+impl DerefMut for U16String {
+ fn deref_mut(&mut self) -> &mut [u16] {
+ self.0.deref_mut()
+ }
+}
+
+impl AsRef<[u16]> for U16String {
+ fn as_ref(&self) -> &[u16] {
+ self.0.as_ref()
+ }
+}
+
+impl AsMut<[u16]> for U16String {
+ fn as_mut(&mut self) -> &mut [u16] {
+ self.0.as_mut()
+ }
+}
+
+impl Serialize for U16String {
+ fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+ where
+ S: serde::Serializer,
+ {
+ use serde::ser::SerializeStruct;
+
+ let mut s = serializer.serialize_struct(NAME, 3)?;
+ s.serialize_field(FIELD_PTR, &(self.0.as_ptr() as usize))?;
+ s.serialize_field(FIELD_LEN, &self.0.len())?;
+ s.end()
+ }
+}
+
+// TODO: Deserialize
diff --git a/serde_v8/src/ser.rs b/serde_v8/src/ser.rs
index 8829b9fc3..ce7e8c707 100644
--- a/serde_v8/src/ser.rs
+++ b/serde_v8/src/ser.rs
@@ -339,11 +339,64 @@ impl<'a, 'b, 'c> ser::SerializeStruct
}
}
+pub struct MagicU16StringSerializer<'a, 'b, 'c> {
+ scope: ScopePtr<'a, 'b, 'c>,
+ ptr: Option<std::ptr::NonNull<u16>>,
+ len: Option<usize>,
+}
+
+impl<'a, 'b, 'c> MagicU16StringSerializer<'a, 'b, 'c> {
+ pub fn new(scope: ScopePtr<'a, 'b, 'c>) -> Self {
+ Self {
+ scope,
+ ptr: None,
+ len: None,
+ }
+ }
+}
+
+impl<'a, 'b, 'c> ser::SerializeStruct for MagicU16StringSerializer<'a, 'b, 'c> {
+ type Ok = JsValue<'a>;
+ type Error = Error;
+
+ fn serialize_field<T: ?Sized + Serialize>(
+ &mut self,
+ key: &'static str,
+ value: &T,
+ ) -> Result<()> {
+ // Get u64 chunk
+ let transmuted = value.serialize(magic::FieldSerializer {})?;
+ match key {
+ magic::u16string::FIELD_PTR => {
+ self.ptr = std::ptr::NonNull::new(transmuted as *mut u16)
+ }
+ magic::u16string::FIELD_LEN => self.len = Some(transmuted as usize),
+ _ => unreachable!(),
+ }
+ Ok(())
+ }
+
+ fn end(self) -> JsResult<'a> {
+ // SAFETY: This function is only called from U16String::serialize(), which
+ // guarantees the Vec is still alive.
+ let slice = unsafe {
+ std::slice::from_raw_parts(self.ptr.unwrap().as_ptr(), self.len.unwrap())
+ };
+ let scope = &mut *self.scope.borrow_mut();
+
+ let v8_value =
+ v8::String::new_from_two_byte(scope, slice, v8::NewStringType::Normal)
+ .unwrap();
+ Ok(v8_value.into())
+ }
+}
+
// Dispatches between magic and regular struct serializers
pub enum StructSerializers<'a, 'b, 'c> {
Magic(MagicSerializer<'a>),
MagicBuffer(MagicBufferSerializer<'a, 'b, 'c>),
MagicByteString(MagicByteStringSerializer<'a, 'b, 'c>),
+ MagicU16String(MagicU16StringSerializer<'a, 'b, 'c>),
Regular(ObjectSerializer<'a, 'b, 'c>),
}
@@ -360,6 +413,7 @@ impl<'a, 'b, 'c> ser::SerializeStruct for StructSerializers<'a, 'b, 'c> {
StructSerializers::Magic(s) => s.serialize_field(key, value),
StructSerializers::MagicBuffer(s) => s.serialize_field(key, value),
StructSerializers::MagicByteString(s) => s.serialize_field(key, value),
+ StructSerializers::MagicU16String(s) => s.serialize_field(key, value),
StructSerializers::Regular(s) => s.serialize_field(key, value),
}
}
@@ -369,6 +423,7 @@ impl<'a, 'b, 'c> ser::SerializeStruct for StructSerializers<'a, 'b, 'c> {
StructSerializers::Magic(s) => s.end(),
StructSerializers::MagicBuffer(s) => s.end(),
StructSerializers::MagicByteString(s) => s.end(),
+ StructSerializers::MagicU16String(s) => s.end(),
StructSerializers::Regular(s) => s.end(),
}
}
@@ -607,6 +662,10 @@ impl<'a, 'b, 'c> ser::Serializer for Serializer<'a, 'b, 'c> {
let m = MagicByteStringSerializer::new(self.scope);
Ok(StructSerializers::MagicByteString(m))
}
+ magic::u16string::NAME => {
+ let m = MagicU16StringSerializer::new(self.scope);
+ Ok(StructSerializers::MagicU16String(m))
+ }
_ => {
let o = ObjectSerializer::new(self.scope, len);
Ok(StructSerializers::Regular(o))
diff --git a/serde_v8/src/serializable.rs b/serde_v8/src/serializable.rs
index df6912db0..533b3f83f 100644
--- a/serde_v8/src/serializable.rs
+++ b/serde_v8/src/serializable.rs
@@ -4,6 +4,7 @@ use std::mem::transmute_copy;
use crate::Buffer;
use crate::ByteString;
+use crate::U16String;
/// Serializable exists to allow boxing values as "objects" to be serialized later,
/// this is particularly useful for async op-responses. This trait is a more efficient
@@ -63,6 +64,7 @@ pub enum Primitive {
String(String),
Buffer(Buffer),
ByteString(ByteString),
+ U16String(U16String),
}
impl serde::Serialize for Primitive {
@@ -86,6 +88,7 @@ impl serde::Serialize for Primitive {
Self::String(x) => x.serialize(s),
Self::Buffer(x) => x.serialize(s),
Self::ByteString(x) => x.serialize(s),
+ Self::U16String(x) => x.serialize(s),
}
}
}
@@ -130,6 +133,8 @@ impl<T: serde::Serialize + 'static> From<T> for SerializablePkg {
Self::Primitive(Primitive::Buffer(tc(x)))
} else if tid == TypeId::of::<ByteString>() {
Self::Primitive(Primitive::ByteString(tc(x)))
+ } else if tid == TypeId::of::<U16String>() {
+ Self::Primitive(Primitive::U16String(tc(x)))
} else {
Self::Serializable(Box::new(x))
}