From 05f6e773fa61edd06d88cdea3bf75df38570b156 Mon Sep 17 00:00:00 2001 From: Aaron O'Mullan Date: Sun, 15 May 2022 17:16:09 +0200 Subject: perf(serde_v8): fast path for large strings (#14450) --- serde_v8/de.rs | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) (limited to 'serde_v8') diff --git a/serde_v8/de.rs b/serde_v8/de.rs index e835ab493..7b825f990 100644 --- a/serde_v8/de.rs +++ b/serde_v8/de.rs @@ -211,7 +211,7 @@ impl<'de, 'a, 'b, 's, 'x> de::Deserializer<'de> { if self.input.is_string() { let v8_string = v8::Local::::try_from(self.input).unwrap(); - let string = v8_string.to_rust_string_lossy(self.scope); + let string = to_utf8(v8_string, self.scope); visitor.visit_string(string) } else { Err(Error::ExpectedString) @@ -661,3 +661,66 @@ fn bigint_to_f64(b: v8::Local) -> f64 { .sum(); sign * x } + +pub fn to_utf8( + s: v8::Local, + scope: &mut v8::HandleScope, +) -> String { + to_utf8_fast(s, scope).unwrap_or_else(|| to_utf8_slow(s, scope)) +} + +fn to_utf8_fast( + s: v8::Local, + scope: &mut v8::HandleScope, +) -> Option { + // Over-allocate by 20% to avoid checking string twice + let len = s.length(); + let capacity = (len as f64 * 1.2) as usize; + let mut buf = Vec::with_capacity(capacity); + let mut nchars = 0; + let data = buf.as_mut_ptr(); + let length = s.write_utf8( + scope, + // SAFETY: we're essentially providing the raw internal slice/buffer owned by the Vec + // which fulfills all of from_raw_parts_mut's safety requirements besides "initialization" + // and since we're operating on a [u8] not [T] we can safely assume the slice's values + // are sufficiently "initialized" for writes + unsafe { std::slice::from_raw_parts_mut(data, capacity) }, + Some(&mut nchars), + v8::WriteOptions::NO_NULL_TERMINATION + | v8::WriteOptions::REPLACE_INVALID_UTF8, + ); + if nchars < len { + return None; + } + // SAFETY: write_utf8 guarantees `length` bytes are initialized & valid utf8 + unsafe { + buf.set_len(length); + Some(String::from_utf8_unchecked(buf)) + } +} + +fn to_utf8_slow( + s: v8::Local, + scope: &mut v8::HandleScope, +) -> String { + let capacity = s.utf8_length(scope); + let mut buf = Vec::with_capacity(capacity); + let data = buf.as_mut_ptr(); + let length = s.write_utf8( + scope, + // SAFETY: we're essentially providing the raw internal slice/buffer owned by the Vec + // which fulfills all of from_raw_parts_mut's safety requirements besides "initialization" + // and since we're operating on a [u8] not [T] we can safely assume the slice's values + // are sufficiently "initialized" for writes + unsafe { std::slice::from_raw_parts_mut(data, capacity) }, + None, + v8::WriteOptions::NO_NULL_TERMINATION + | v8::WriteOptions::REPLACE_INVALID_UTF8, + ); + // SAFETY: write_utf8 guarantees `length` bytes are initialized & valid utf8 + unsafe { + buf.set_len(length); + String::from_utf8_unchecked(buf) + } +} -- cgit v1.2.3