diff options
author | Matt Mastracci <matthew@mastracci.com> | 2023-04-04 06:46:31 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-04 06:46:31 -0600 |
commit | a1764f7690cfdc3e42724fcad29ef954b7e576a4 (patch) | |
tree | 1b621ebd7a6ef50687eeb2061740895096136e8a /core/fast_string.rs | |
parent | 2dc20168371e827b86e2ce0d1d7787139fba68f3 (diff) |
refactor(core): Improve ergonomics of managing ASCII strings (#18498)
This is a follow-on to the earlier work in reducing string copies,
mainly focused on ensuring that ASCII strings are easy to provide to the
JS runtime.
While we are replacing a 16-byte reference in a number of places with a
24-byte structure (measured via `std::mem::size_of`), the reduction in
copies wins out over the additional size of the arguments passed into
functions.
Benchmarking shows approximately the same if not slightly less wallclock
time/instructions retired, but I believe this continues to open up
further refactoring opportunities.
Diffstat (limited to 'core/fast_string.rs')
-rw-r--r-- | core/fast_string.rs | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/core/fast_string.rs b/core/fast_string.rs new file mode 100644 index 000000000..95dfb4939 --- /dev/null +++ b/core/fast_string.rs @@ -0,0 +1,243 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. + +use std::borrow::Borrow; +use std::fmt::Debug; +use std::hash::Hash; +use std::sync::Arc; +use url::Url; +use v8::NewStringType; + +/// Module names and code can be sourced from strings or bytes that are either owned or borrowed. This enumeration allows us +/// to perform a minimal amount of cloning and format-shifting of the underlying data. +/// +/// Note that any [`FastString`] created from a `'static` byte array or string must contain ASCII characters. +/// +/// Examples of ways to construct a [`FastString`]: +/// +/// ```rust +/// # use deno_core::{ascii_str, FastString}; +/// +/// let code: FastString = ascii_str!("a string"); +/// let code: FastString = format!("a string").into(); +/// ``` +pub enum FastString { + /// Created from static data. + Static(&'static str), + + /// Created from static data, known to contain only ASCII chars. + StaticAscii(&'static str), + + /// An owned chunk of data. Note that we use `Box` rather than `Vec` to avoid the + /// storage overhead. + Owned(Box<str>), + + // Scripts loaded from the `deno_graph` infrastructure. + Arc(Arc<str>), +} + +impl FastString { + /// Compile-time function to determine if a string is ASCII. Note that UTF-8 chars + /// longer than one byte have the high-bit set and thus, are not ASCII. + const fn is_ascii(s: &'static [u8]) -> bool { + let mut i = 0; + while i < s.len() { + if !s[i].is_ascii() { + return false; + } + i += 1; + } + true + } + + /// Create a [`FastString`] from a static string. The string may contain non-ASCII characters, and if + /// so, will take the slower path when used in v8. + pub const fn from_static(s: &'static str) -> Self { + if Self::is_ascii(s.as_bytes()) { + Self::StaticAscii(s) + } else { + Self::Static(s) + } + } + + /// Create a [`FastString`] from a static string. If the string contains non-ASCII characters, the compiler + /// will abort. + pub const fn ensure_static_ascii(s: &'static str) -> Self { + if Self::is_ascii(s.as_bytes()) { + Self::StaticAscii(s) + } else { + panic!("This string contained non-ASCII characters and cannot be created with ensure_static_ascii") + } + } + + /// Creates a cheap copy of this [`FastString`], potentially transmuting it to a faster form. Note that this + /// is not a clone operation as it consumes the old [`FastString`]. + pub fn into_cheap_copy(self) -> (Self, Self) { + match self { + Self::Static(s) => (Self::Static(s), Self::Static(s)), + Self::StaticAscii(s) => (Self::StaticAscii(s), Self::StaticAscii(s)), + Self::Arc(s) => (Self::Arc(s.clone()), Self::Arc(s)), + Self::Owned(s) => { + let s: Arc<str> = s.into(); + (Self::Arc(s.clone()), Self::Arc(s)) + } + } + } + + pub const fn try_static_ascii(&self) -> Option<&'static [u8]> { + match self { + Self::StaticAscii(s) => Some(s.as_bytes()), + _ => None, + } + } + + pub fn as_bytes(&self) -> &[u8] { + // TODO(mmastrac): This can be const eventually (waiting for Arc const deref) + match self { + Self::Arc(s) => s.as_bytes(), + Self::Owned(s) => s.as_bytes(), + Self::Static(s) => s.as_bytes(), + Self::StaticAscii(s) => s.as_bytes(), + } + } + + pub fn as_str(&self) -> &str { + // TODO(mmastrac): This can be const eventually (waiting for Arc const deref) + match self { + Self::Arc(s) => s, + Self::Owned(s) => s, + Self::Static(s) => s, + Self::StaticAscii(s) => s, + } + } + + /// Create a v8 string from this [`FastString`]. If the string is static and contains only ASCII characters, + /// an external one-byte static is created. + pub fn v8<'a>( + &self, + scope: &mut v8::HandleScope<'a>, + ) -> v8::Local<'a, v8::String> { + match self.try_static_ascii() { + Some(s) => v8::String::new_external_onebyte_static(scope, s).unwrap(), + None => { + v8::String::new_from_utf8(scope, self.as_bytes(), NewStringType::Normal) + .unwrap() + } + } + } + + /// Truncates a [`FastString`] value, possibly re-allocating or memcpy'ing. May be slow. + pub fn truncate(&mut self, index: usize) { + match self { + Self::Static(b) => *self = Self::Static(&b[..index]), + Self::StaticAscii(b) => *self = Self::StaticAscii(&b[..index]), + Self::Owned(b) => *self = Self::Owned(b[..index].to_owned().into()), + // We can't do much if we have an Arc<str>, so we'll just take ownership of the truncated version + Self::Arc(s) => *self = s[..index].to_owned().into(), + } + } +} + +impl Hash for FastString { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + self.as_str().hash(state) + } +} + +impl AsRef<str> for FastString { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl Borrow<str> for FastString { + fn borrow(&self) -> &str { + self.as_str() + } +} + +impl Debug for FastString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + Debug::fmt(self.as_str(), f) + } +} + +impl Default for FastString { + fn default() -> Self { + Self::StaticAscii("") + } +} + +impl PartialEq for FastString { + fn eq(&self, other: &Self) -> bool { + self.as_bytes() == other.as_bytes() + } +} + +impl Eq for FastString {} + +/// [`FastString`] can be made cheaply from [`Url`] as we know it's owned and don't need to do an +/// ASCII check. +impl From<Url> for FastString { + fn from(value: Url) -> Self { + let s: String = value.into(); + s.into() + } +} + +/// [`FastString`] can be made cheaply from [`String`] as we know it's owned and don't need to do an +/// ASCII check. +impl From<String> for FastString { + fn from(value: String) -> Self { + FastString::Owned(value.into_boxed_str()) + } +} + +/// [`FastString`] can be made cheaply from [`Arc<str>`] as we know it's shared and don't need to do an +/// ASCII check. +impl From<Arc<str>> for FastString { + fn from(value: Arc<str>) -> Self { + FastString::Arc(value) + } +} + +/// Include a fast string in the binary. This string is asserted at compile-time to be 7-bit ASCII for optimal +/// v8 performance. +#[macro_export] +macro_rules! include_ascii_string { + ($file:literal) => { + $crate::FastString::ensure_static_ascii(include_str!($file)) + }; +} + +/// Include a fast string in the binary from a string literal. This string is asserted at compile-time to be +/// 7-bit ASCII for optimal v8 performance. +#[macro_export] +macro_rules! ascii_str { + ($str:literal) => { + $crate::FastString::ensure_static_ascii($str) + }; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn truncate() { + let mut s = "123456".to_owned(); + s.truncate(3); + + let mut code: FastString = FastString::from_static("123456"); + code.truncate(3); + assert_eq!(s, code.as_ref()); + + let mut code: FastString = "123456".to_owned().into(); + code.truncate(3); + assert_eq!(s, code.as_ref()); + + let arc_str: Arc<str> = "123456".into(); + let mut code: FastString = arc_str.into(); + code.truncate(3); + assert_eq!(s, code.as_ref()); + } +} |