summaryrefslogtreecommitdiff
path: root/core/runtime/ops.rs
diff options
context:
space:
mode:
authorMatt Mastracci <matthew@mastracci.com>2023-07-01 16:07:05 -0600
committerGitHub <noreply@github.com>2023-07-01 22:07:05 +0000
commit6afdcf59b80b4a3ecf60f220ddff14f4309133d0 (patch)
treed6a6dad1945430e161fba9e86d93fe3d8893d89c /core/runtime/ops.rs
parent0f719aa79c2b471815c9d21014b37719c6557c1b (diff)
refactor(ops): op2 supports strings in argument and return position (#19613)
Support strings (&str, String, and Cow) in the argument position and String in the return position. Avoids copies where possible, though this is not always something we can do.
Diffstat (limited to 'core/runtime/ops.rs')
-rw-r--r--core/runtime/ops.rs242
1 files changed, 232 insertions, 10 deletions
diff --git a/core/runtime/ops.rs b/core/runtime/ops.rs
index 9e37977c8..5ecab5edf 100644
--- a/core/runtime/ops.rs
+++ b/core/runtime/ops.rs
@@ -7,8 +7,10 @@ use futures::future::Either;
use futures::future::Future;
use futures::future::FutureExt;
use futures::task::noop_waker_ref;
+use std::borrow::Cow;
use std::cell::RefCell;
use std::future::ready;
+use std::mem::MaybeUninit;
use std::option::Option;
use std::task::Context;
use std::task::Poll;
@@ -197,6 +199,104 @@ pub fn to_i64(number: &v8::Value) -> i32 {
0
}
+/// Expands `inbuf` to `outbuf`, assuming that `outbuf` has at least 2x `input_length`.
+#[inline(always)]
+unsafe fn latin1_to_utf8(
+ input_length: usize,
+ inbuf: *const u8,
+ outbuf: *mut u8,
+) -> usize {
+ let mut output = 0;
+ let mut input = 0;
+ while input < input_length {
+ let char = *(inbuf.add(input));
+ if char < 0x80 {
+ *(outbuf.add(output)) = char;
+ output += 1;
+ } else {
+ // Top two bits
+ *(outbuf.add(output)) = (char >> 6) | 0b1100_0000;
+ // Bottom six bits
+ *(outbuf.add(output + 1)) = (char & 0b0011_1111) | 0b1000_0000;
+ output += 2;
+ }
+ input += 1;
+ }
+ output
+}
+
+/// Converts a [`v8::fast_api::FastApiOneByteString`] to either an owned string, or a borrowed string, depending on whether it fits into the
+/// provided buffer.
+pub fn to_str_ptr<'a, const N: usize>(
+ string: &mut v8::fast_api::FastApiOneByteString,
+ buffer: &'a mut [MaybeUninit<u8>; N],
+) -> Cow<'a, str> {
+ let input_buf = string.as_bytes();
+ let input_len = input_buf.len();
+ let output_len = buffer.len();
+
+ // We know that this string is full of either one or two-byte UTF-8 chars, so if it's < 1/2 of N we
+ // can skip the ASCII check and just start copying.
+ if input_len < N / 2 {
+ debug_assert!(output_len >= input_len * 2);
+ let buffer = buffer.as_mut_ptr() as *mut u8;
+
+ let written =
+ // SAFETY: We checked that buffer is at least 2x the size of input_buf
+ unsafe { latin1_to_utf8(input_buf.len(), input_buf.as_ptr(), buffer) };
+
+ debug_assert!(written <= output_len);
+
+ let slice = std::ptr::slice_from_raw_parts(buffer, written);
+ // SAFETY: We know it's valid UTF-8, so make a string
+ Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(&*slice) })
+ } else {
+ // TODO(mmastrac): We could be smarter here about not allocating
+ Cow::Owned(to_string_ptr(string))
+ }
+}
+
+/// Converts a [`v8::fast_api::FastApiOneByteString`] to an owned string. May over-allocate to avoid
+/// re-allocation.
+pub fn to_string_ptr(
+ string: &mut v8::fast_api::FastApiOneByteString,
+) -> String {
+ let input_buf = string.as_bytes();
+ let capacity = input_buf.len() * 2;
+
+ // SAFETY: We're allocating a buffer of 2x the input size, writing valid UTF-8, then turning that into a string
+ unsafe {
+ // Create an uninitialized buffer of `capacity` bytes. We need to be careful here to avoid
+ // accidentally creating a slice of u8 which would be invalid.
+ let layout = std::alloc::Layout::from_size_align(capacity, 1).unwrap();
+ let out = std::alloc::alloc(layout);
+
+ let written = latin1_to_utf8(input_buf.len(), input_buf.as_ptr(), out);
+
+ debug_assert!(written <= capacity);
+ // We know it's valid UTF-8, so make a string
+ String::from_raw_parts(out, written, capacity)
+ }
+}
+
+/// Converts a [`v8::String`] to either an owned string, or a borrowed string, depending on whether it fits into the
+/// provided buffer.
+#[inline(always)]
+pub fn to_str<'a, const N: usize>(
+ scope: &mut v8::Isolate,
+ string: &v8::Value,
+ buffer: &'a mut [MaybeUninit<u8>; N],
+) -> Cow<'a, str> {
+ if !string.is_string() {
+ return Cow::Borrowed("");
+ }
+
+ // SAFETY: We checked is_string above
+ let string: &v8::String = unsafe { std::mem::transmute(string) };
+
+ string.to_rust_cow_lossy(scope, buffer)
+}
+
#[cfg(test)]
mod tests {
use crate::error::generic_error;
@@ -206,6 +306,7 @@ mod tests {
use crate::JsRuntime;
use crate::RuntimeOptions;
use deno_ops::op2;
+ use std::borrow::Cow;
use std::cell::Cell;
crate::extension!(
@@ -219,6 +320,13 @@ mod tests {
op_test_result_void_err,
op_test_result_primitive_ok,
op_test_result_primitive_err,
+ op_test_string_owned,
+ op_test_string_ref,
+ op_test_string_cow,
+ op_test_string_roundtrip_char,
+ op_test_string_return,
+ op_test_string_option_return,
+ op_test_string_roundtrip,
op_test_generics<String>,
]
);
@@ -229,18 +337,11 @@ mod tests {
#[op2(core, fast)]
pub fn op_test_fail() {
- FAIL.with(|b| {
- println!("fail");
- b.set(true)
- })
+ FAIL.with(|b| b.set(true))
}
/// Run a test for a single op.
- fn run_test2(
- repeat: usize,
- op: &'static str,
- test: &'static str,
- ) -> Result<(), AnyError> {
+ fn run_test2(repeat: usize, op: &str, test: &str) -> Result<(), AnyError> {
let mut runtime = JsRuntime::new(RuntimeOptions {
extensions: vec![testing::init_ops_and_esm()],
..Default::default()
@@ -278,7 +379,7 @@ mod tests {
),
)?;
if FAIL.with(|b| b.get()) {
- Err(generic_error("test failed"))
+ Err(generic_error(format!("{op} test failed ({test})")))
} else {
Ok(())
}
@@ -406,6 +507,127 @@ mod tests {
Ok(())
}
+ #[op2(core, fast)]
+ pub fn op_test_string_owned(#[string] s: String) -> u32 {
+ s.len() as _
+ }
+
+ #[op2(core, fast)]
+ pub fn op_test_string_ref(#[string] s: &str) -> u32 {
+ s.len() as _
+ }
+
+ #[op2(core, fast)]
+ pub fn op_test_string_cow(#[string] s: Cow<str>) -> u32 {
+ s.len() as _
+ }
+
+ #[op2(core, fast)]
+ pub fn op_test_string_roundtrip_char(#[string] s: Cow<str>) -> u32 {
+ s.chars().next().unwrap() as u32
+ }
+
+ #[tokio::test]
+ pub async fn test_op_strings() -> Result<(), Box<dyn std::error::Error>> {
+ for op in [
+ "op_test_string_owned",
+ "op_test_string_cow",
+ "op_test_string_ref",
+ ] {
+ for (len, str) in [
+ // ASCII
+ (3, "'abc'"),
+ // Latin-1 (one byte but two UTF-8 chars)
+ (2, "'\\u00a0'"),
+ // ASCII
+ (10000, "'a'.repeat(10000)"),
+ // Latin-1
+ (20000, "'\\u00a0'.repeat(10000)"),
+ // 4-byte UTF-8 emoji (1F995 = 🦕)
+ (40000, "'\\u{1F995}'.repeat(10000)"),
+ ] {
+ let test = format!("assert({op}({str}) == {len})");
+ run_test2(10000, op, &test)?;
+ }
+ }
+
+ // Ensure that we're correctly encoding UTF-8
+ run_test2(
+ 10000,
+ "op_test_string_roundtrip_char",
+ "assert(op_test_string_roundtrip_char('\\u00a0') == 0xa0)",
+ )?;
+ run_test2(
+ 10000,
+ "op_test_string_roundtrip_char",
+ "assert(op_test_string_roundtrip_char('\\u00ff') == 0xff)",
+ )?;
+ run_test2(
+ 10000,
+ "op_test_string_roundtrip_char",
+ "assert(op_test_string_roundtrip_char('\\u0080') == 0x80)",
+ )?;
+ run_test2(
+ 10000,
+ "op_test_string_roundtrip_char",
+ "assert(op_test_string_roundtrip_char('\\u0100') == 0x100)",
+ )?;
+ Ok(())
+ }
+
+ #[op2(core)]
+ #[string]
+ pub fn op_test_string_return(
+ #[string] a: Cow<str>,
+ #[string] b: Cow<str>,
+ ) -> String {
+ (a + b).to_string()
+ }
+
+ #[op2(core)]
+ #[string]
+ pub fn op_test_string_option_return(
+ #[string] a: Cow<str>,
+ #[string] b: Cow<str>,
+ ) -> Option<String> {
+ if a == "none" {
+ return None;
+ }
+ Some((a + b).to_string())
+ }
+
+ #[op2(core)]
+ #[string]
+ pub fn op_test_string_roundtrip(#[string] s: String) -> String {
+ s
+ }
+
+ #[tokio::test]
+ pub async fn test_op_string_returns() -> Result<(), Box<dyn std::error::Error>>
+ {
+ run_test2(
+ 1,
+ "op_test_string_return",
+ "assert(op_test_string_return('a', 'b') == 'ab')",
+ )?;
+ run_test2(
+ 1,
+ "op_test_string_option_return",
+ "assert(op_test_string_option_return('a', 'b') == 'ab')",
+ )?;
+ run_test2(
+ 1,
+ "op_test_string_option_return",
+ "assert(op_test_string_option_return('none', 'b') == null)",
+ )?;
+ run_test2(
+ 1,
+ "op_test_string_roundtrip",
+ "assert(op_test_string_roundtrip('\\u0080\\u00a0\\u00ff') == '\\u0080\\u00a0\\u00ff')",
+ )?;
+ Ok(())
+ }
+
// We don't actually test this one -- we just want it to compile
#[op2(core, fast)]
pub fn op_test_generics<T: Clone>() {}