diff options
Diffstat (limited to 'ext/ffi')
-rw-r--r-- | ext/ffi/README.md | 22 | ||||
-rw-r--r-- | ext/ffi/build.rs | 63 | ||||
-rw-r--r-- | ext/ffi/jit_trampoline.rs | 153 | ||||
-rw-r--r-- | ext/ffi/lib.rs | 39 | ||||
-rw-r--r-- | ext/ffi/tcc.rs | 143 | ||||
m--------- | ext/ffi/tinycc | 0 |
6 files changed, 413 insertions, 7 deletions
diff --git a/ext/ffi/README.md b/ext/ffi/README.md index cc2d81cd2..5f7f1cb9f 100644 --- a/ext/ffi/README.md +++ b/ext/ffi/README.md @@ -1,3 +1,25 @@ # deno_ffi This crate implements dynamic library ffi. + +## Performance + +Deno FFI calls have extremely low overhead (~1ns on M1 16GB RAM) and perform on +par with native code. Deno leverages V8 fast api calls and JIT compiled bindings +to achieve these high speeds. + +`Deno.dlopen` generates an optimized and a fallback path. Optimized paths are +triggered when V8 decides to optimize the function, hence call through the Fast +API. Fallback paths handle types like function callbacks and implement proper +error handling for unexpected types, that is not supported in Fast calls. + +Optimized calls enter a JIT compiled function "trampoline" that translates Fast +API values directly for symbol calls. JIT compilation itself is super fast, +thanks to `tinycc`. Currently, the optimized path is only supported on Linux and +MacOS. + +To run benchmarks: + +```bash +target/release/deno bench --allow-ffi --allow-read --unstable ./test_ffi/tests/bench.js +``` diff --git a/ext/ffi/build.rs b/ext/ffi/build.rs new file mode 100644 index 000000000..fd6aea608 --- /dev/null +++ b/ext/ffi/build.rs @@ -0,0 +1,63 @@ +// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. + +use std::env; + +fn build_tcc() { + { + // TODO(@littledivy): Windows support for fast call. + // let tcc_path = root + // .parent() + // .unwrap() + // .to_path_buf() + // .parent() + // .unwrap() + // .to_path_buf() + // .join("third_party") + // .join("prebuilt") + // .join("win"); + // println!("cargo:rustc-link-search=native={}", tcc_path.display()); + } + #[cfg(not(target_os = "windows"))] + { + use std::path::PathBuf; + use std::process::exit; + use std::process::Command; + + let root = PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"))); + let tcc_src = root.join("tinycc"); + dbg!(&tcc_src); + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + let mut configure = Command::new(tcc_src.join("configure")); + configure.current_dir(&out_dir); + configure.args(&["--enable-static", "--extra-cflags=-fPIC -O3 -g -static"]); + let status = configure.status().unwrap(); + if !status.success() { + eprintln!("Fail to configure: {:?}", status); + exit(1); + } + + let mut make = Command::new("make"); + make.current_dir(&out_dir).arg(format!( + "-j{}", + env::var("NUM_JOBS").unwrap_or_else(|_| String::from("1")) + )); + make.args(&["libtcc.a"]); + let status = make.status().unwrap(); + + if !status.success() { + eprintln!("Fail to make: {:?}", status); + exit(1); + } + println!("cargo:rustc-link-search=native={}", out_dir.display()); + println!("cargo:rerun-if-changed={}", tcc_src.display()); + } +} + +#[cfg(target_os = "windows")] +fn main() {} + +#[cfg(not(target_os = "windows"))] +fn main() { + build_tcc(); + println!("cargo:rustc-link-lib=static=tcc"); +} diff --git a/ext/ffi/jit_trampoline.rs b/ext/ffi/jit_trampoline.rs new file mode 100644 index 000000000..40c14dfb0 --- /dev/null +++ b/ext/ffi/jit_trampoline.rs @@ -0,0 +1,153 @@ +// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. + +use crate::NativeType; +use crate::{tcc::Compiler, Symbol}; +use std::ffi::c_void; +use std::ffi::CString; +use std::fmt::Write as _; + +pub(crate) struct Allocation { + pub addr: *mut c_void, + _ctx: Compiler, + _sym: Box<Symbol>, +} + +macro_rules! cstr { + ($st:expr) => { + &CString::new($st).unwrap() + }; +} + +fn native_arg_to_c(ty: &NativeType) -> &'static str { + match ty { + NativeType::U8 | NativeType::U16 | NativeType::U32 => "uint32_t", + NativeType::I8 | NativeType::I16 | NativeType::I32 => "int32_t", + NativeType::Void => "void", + NativeType::F32 => "float", + NativeType::F64 => "double", + _ => unimplemented!(), + } +} + +fn native_to_c(ty: &NativeType) -> &'static str { + match ty { + NativeType::U8 => "uint8_t", + NativeType::U16 => "uint16_t", + NativeType::U32 => "uint32_t", + NativeType::I8 => "int8_t", + NativeType::I16 => "uint16_t", + NativeType::I32 => "int32_t", + NativeType::Void => "void", + NativeType::F32 => "float", + NativeType::F64 => "double", + _ => unimplemented!(), + } +} + +pub(crate) fn codegen(sym: &crate::Symbol) -> String { + let mut c = String::from("#include <stdint.h>\n"); + let ret = native_to_c(&sym.result_type); + + // extern <return_type> func( + c += "\nextern "; + c += ret; + c += " func("; + // <param_type> p0, <param_type> p1, ...); + for (i, ty) in sym.parameter_types.iter().enumerate() { + if i > 0 { + c += ", "; + } + c += native_to_c(ty); + let _ = write!(c, " p{i}"); + } + c += ");\n\n"; + + // void* recv, <param_type> p0, <param_type> p1, ...); + c += ret; + c += " func_trampoline("; + c += "void* recv"; + for (i, ty) in sym.parameter_types.iter().enumerate() { + c += ", "; + c += native_arg_to_c(ty); + let _ = write!(c, " p{i}"); + } + c += ") {\n"; + // return func(p0, p1, ...); + c += " return func("; + for (i, _) in sym.parameter_types.iter().enumerate() { + if i > 0 { + c += ", "; + } + let _ = write!(c, "p{i}"); + } + c += ");\n}\n\n"; + c +} + +pub(crate) fn gen_trampoline( + sym: Box<crate::Symbol>, +) -> Result<Box<Allocation>, ()> { + let mut ctx = Compiler::new()?; + ctx.set_options(cstr!("-nostdlib")); + // SAFETY: symbol satisfies ABI requirement. + unsafe { ctx.add_symbol(cstr!("func"), sym.ptr.0 as *const c_void) }; + let c = codegen(&sym); + + ctx.compile_string(cstr!(c))?; + let alloc = Allocation { + addr: ctx.relocate_and_get_symbol(cstr!("func_trampoline"))?, + _ctx: ctx, + _sym: sym, + }; + Ok(Box::new(alloc)) +} + +#[cfg(test)] +mod tests { + use super::*; + use libffi::middle::Type; + use std::ptr::null_mut; + + fn codegen(parameters: Vec<NativeType>, ret: NativeType) -> String { + let sym = Box::new(crate::Symbol { + cif: libffi::middle::Cif::new(vec![], Type::void()), + ptr: libffi::middle::CodePtr(null_mut()), + parameter_types: parameters, + result_type: ret, + can_callback: false, + }); + super::codegen(&sym) + } + + #[test] + fn test_gen_trampoline() { + assert_eq!( + codegen(vec![], NativeType::Void), + "#include <stdint.h>\n\nextern void func();\n\nvoid func_trampoline(void* recv) {\n return func();\n}\n\n" + ); + assert_eq!( + codegen(vec![NativeType::U32, NativeType::U32], NativeType::U32), + "#include <stdint.h>\n\nextern uint32_t func(uint32_t p0, uint32_t p1);\n\nuint32_t func_trampoline(void* recv, uint32_t p0, uint32_t p1) {\n return func(p0, p1);\n}\n\n" + ); + assert_eq!( + codegen(vec![NativeType::I32, NativeType::I32], NativeType::I32), + "#include <stdint.h>\n\nextern int32_t func(int32_t p0, int32_t p1);\n\nint32_t func_trampoline(void* recv, int32_t p0, int32_t p1) {\n return func(p0, p1);\n}\n\n" + ); + assert_eq!( + codegen(vec![NativeType::F32, NativeType::F32], NativeType::F32), + "#include <stdint.h>\n\nextern float func(float p0, float p1);\n\nfloat func_trampoline(void* recv, float p0, float p1) {\n return func(p0, p1);\n}\n\n" + ); + assert_eq!( + codegen(vec![NativeType::F64, NativeType::F64], NativeType::F64), + "#include <stdint.h>\n\nextern double func(double p0, double p1);\n\ndouble func_trampoline(void* recv, double p0, double p1) {\n return func(p0, p1);\n}\n\n" + ); + } + + #[test] + fn test_gen_trampoline_implicit_cast() { + assert_eq!( + codegen(vec![NativeType::I8, NativeType::U8], NativeType::I8), + "#include <stdint.h>\n\nextern int8_t func(int8_t p0, uint8_t p1);\n\nint8_t func_trampoline(void* recv, int32_t p0, uint32_t p1) {\n return func(p0, p1);\n}\n\n" + ) + } +} diff --git a/ext/ffi/lib.rs b/ext/ffi/lib.rs index a5a156727..feb879aba 100644 --- a/ext/ffi/lib.rs +++ b/ext/ffi/lib.rs @@ -39,6 +39,11 @@ use std::path::PathBuf; use std::ptr; use std::rc::Rc; +#[cfg(not(target_os = "windows"))] +mod jit_trampoline; +#[cfg(not(target_os = "windows"))] +mod tcc; + thread_local! { static LOCAL_ISOLATE_POINTER: RefCell<*const v8::Isolate> = RefCell::new(ptr::null()); } @@ -72,6 +77,8 @@ struct Symbol { ptr: libffi::middle::CodePtr, parameter_types: Vec<NativeType>, result_type: NativeType, + // This is dead code only on Windows + #[allow(dead_code)] can_callback: bool, } @@ -678,6 +685,7 @@ impl From<&NativeType> for fast_api::Type { } } +#[cfg(not(target_os = "windows"))] fn is_fast_api(rv: NativeType) -> bool { !matches!( rv, @@ -696,25 +704,36 @@ fn make_sync_fn<'s>( scope: &mut v8::HandleScope<'s>, sym: Box<Symbol>, ) -> v8::Local<'s, v8::Function> { - let mut fast_ffi_templ = None; + #[cfg(not(target_os = "windows"))] + let mut fast_ffi_templ: Option<FfiFastCallTemplate> = None; + + #[cfg(target_os = "windows")] + let fast_ffi_templ: Option<FfiFastCallTemplate> = None; + #[cfg(not(target_os = "windows"))] + let mut fast_allocations: Option<*mut ()> = None; + #[cfg(not(target_os = "windows"))] if !sym.can_callback && !sym.parameter_types.iter().any(|t| !is_fast_api(*t)) && is_fast_api(sym.result_type) { + let ret = fast_api::Type::from(&sym.result_type); + let mut args = sym .parameter_types .iter() .map(|t| t.into()) .collect::<Vec<_>>(); - if args.is_empty() { - args.push(fast_api::Type::V8Value); - } + // recv + args.insert(0, fast_api::Type::V8Value); + let symbol_trampoline = + jit_trampoline::gen_trampoline(sym.clone()).expect("gen_trampoline"); fast_ffi_templ = Some(FfiFastCallTemplate { args: args.into_boxed_slice(), - ret: (&fast_api::Type::from(&sym.result_type)).into(), - symbol_ptr: sym.ptr.as_ptr() as *const c_void, + ret: (&ret).into(), + symbol_ptr: symbol_trampoline.addr, }); + fast_allocations = Some(Box::into_raw(symbol_trampoline) as *mut ()); } let sym = Box::leak(sym); @@ -754,7 +773,13 @@ fn make_sync_fn<'s>( Box::new(move |_| { // SAFETY: This is never called twice. pointer obtained // from Box::into_raw, hence, satisfies memory layout requirements. - unsafe { Box::from_raw(sym) }; + unsafe { + Box::from_raw(sym); + #[cfg(not(target_os = "windows"))] + if let Some(fast_allocations) = fast_allocations { + Box::from_raw(fast_allocations as *mut jit_trampoline::Allocation); + } + } }), ); diff --git a/ext/ffi/tcc.rs b/ext/ffi/tcc.rs new file mode 100644 index 000000000..edc30c893 --- /dev/null +++ b/ext/ffi/tcc.rs @@ -0,0 +1,143 @@ +// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. + +use std::{ + ffi::CStr, + marker::PhantomData, + os::raw::{c_char, c_int, c_void}, + ptr::null_mut, +}; + +#[repr(C)] +#[derive(Debug)] +pub struct TCCState { + _unused: [u8; 0], +} +pub const TCC_OUTPUT_MEMORY: i32 = 1; + +extern "C" { + pub fn tcc_new() -> *mut TCCState; + pub fn tcc_delete(s: *mut TCCState); + pub fn tcc_set_options(s: *mut TCCState, str: *const c_char); + pub fn tcc_compile_string(s: *mut TCCState, buf: *const c_char) -> c_int; + pub fn tcc_add_symbol( + s: *mut TCCState, + name: *const c_char, + val: *const c_void, + ) -> c_int; + pub fn tcc_set_output_type(s: *mut TCCState, output_type: c_int) -> c_int; + pub fn tcc_relocate(s1: *mut TCCState, ptr: *mut c_void) -> c_int; + pub fn tcc_get_symbol(s: *mut TCCState, name: *const c_char) -> *mut c_void; +} + +/// Compilation context. +pub struct Compiler { + inner: *mut TCCState, + _phantom: PhantomData<TCCState>, + pub bin: Option<Vec<u8>>, +} + +impl Compiler { + pub fn new() -> Result<Self, ()> { + // SAFETY: There is one context per thread. + let inner = unsafe { tcc_new() }; + if inner.is_null() { + Err(()) + } else { + let ret = + // SAFETY: set output to memory. + unsafe { tcc_set_output_type(inner, TCC_OUTPUT_MEMORY as c_int) }; + assert_eq!(ret, 0); + Ok(Self { + inner, + _phantom: PhantomData, + bin: None, + }) + } + } + + pub fn set_options(&mut self, option: &CStr) -> &mut Self { + // SAFETY: option is a null-terminated C string. + unsafe { + tcc_set_options(self.inner, option.as_ptr()); + } + self + } + + pub fn compile_string(&mut self, p: &CStr) -> Result<(), ()> { + // SAFETY: p is a null-terminated C string. + let ret = unsafe { tcc_compile_string(self.inner, p.as_ptr()) }; + if ret == 0 { + Ok(()) + } else { + Err(()) + } + } + + /// # Safety + /// Symbol need satisfy ABI requirement. + pub unsafe fn add_symbol(&mut self, sym: &CStr, val: *const c_void) { + // SAFETY: sym is a null-terminated C string. + let ret = tcc_add_symbol(self.inner, sym.as_ptr(), val); + assert_eq!(ret, 0); + } + + pub fn relocate_and_get_symbol( + &mut self, + sym: &CStr, + ) -> Result<*mut c_void, ()> { + // SAFETY: pass null ptr to get required length + let len = unsafe { tcc_relocate(self.inner, null_mut()) }; + if len == -1 { + return Err(()); + }; + let mut bin = Vec::with_capacity(len as usize); + let ret = + // SAFETY: bin is allocated up to len. + unsafe { tcc_relocate(self.inner, bin.as_mut_ptr() as *mut c_void) }; + if ret != 0 { + return Err(()); + } + // SAFETY: if ret == 0, bin is initialized. + unsafe { + bin.set_len(len as usize); + } + self.bin = Some(bin); + // SAFETY: sym is a null-terminated C string. + let addr = unsafe { tcc_get_symbol(self.inner, sym.as_ptr()) }; + Ok(addr) + } +} + +impl Drop for Compiler { + fn drop(&mut self) { + // SAFETY: delete state from tcc_new() + unsafe { tcc_delete(self.inner) }; + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::ffi::CString; + + #[test] + fn test_compiler_jit() { + let p = CString::new( + r#" + #include <stdint.h> + int32_t add(int32_t a, int32_t b) { + return a + b; + } + "# + .as_bytes(), + ) + .unwrap(); + let sym = CString::new("add".as_bytes()).unwrap(); + + let mut ctx = Compiler::new().unwrap(); + let ops = CString::new("-nostdlib").unwrap(); + ctx.set_options(&ops); + assert!(ctx.compile_string(&p).is_ok()); + ctx.relocate_and_get_symbol(&sym).unwrap(); + } +} diff --git a/ext/ffi/tinycc b/ext/ffi/tinycc new file mode 160000 +Subproject afc136262e93ae85fb3643005b36dbfc30d99c4 |