diff options
Diffstat (limited to 'ext/kv/codec.rs')
-rw-r--r-- | ext/kv/codec.rs | 559 |
1 files changed, 559 insertions, 0 deletions
diff --git a/ext/kv/codec.rs b/ext/kv/codec.rs new file mode 100644 index 000000000..b2acfdbc2 --- /dev/null +++ b/ext/kv/codec.rs @@ -0,0 +1,559 @@ +// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. +// Ported from https://github.com/foundationdb-rs/foundationdb-rs/blob/main/foundationdb/src/tuple/pack.rs + +use crate::Key; +use crate::KeyPart; + +//const NIL: u8 = 0x00; +const BYTES: u8 = 0x01; +const STRING: u8 = 0x02; +//const NESTED: u8 = 0x05; +const NEGINTSTART: u8 = 0x0b; +const INTZERO: u8 = 0x14; +const POSINTEND: u8 = 0x1d; +//const FLOAT: u8 = 0x20; +const DOUBLE: u8 = 0x21; +const FALSE: u8 = 0x26; +const TRUE: u8 = 0x27; + +const ESCAPE: u8 = 0xff; + +const CANONICAL_NAN_POS: u64 = 0x7ff8000000000000u64; +const CANONICAL_NAN_NEG: u64 = 0xfff8000000000000u64; + +pub fn canonicalize_f64(n: f64) -> f64 { + if n.is_nan() { + if n.is_sign_negative() { + f64::from_bits(CANONICAL_NAN_NEG) + } else { + f64::from_bits(CANONICAL_NAN_POS) + } + } else { + n + } +} + +pub fn encode_key(key: &Key) -> std::io::Result<Vec<u8>> { + // Disallow empty key + if key.0.is_empty() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "key should not be empty", + )); + } + + let mut output: Vec<u8> = vec![]; + for part in &key.0 { + match part { + KeyPart::String(key) => { + output.push(STRING); + escape_raw_bytes_into(&mut output, key.as_bytes()); + output.push(0); + } + KeyPart::Int(key) => { + bigint::encode_into(&mut output, key)?; + } + KeyPart::Float(key) => { + double::encode_into(&mut output, *key); + } + KeyPart::Bytes(key) => { + output.push(BYTES); + escape_raw_bytes_into(&mut output, key); + output.push(0); + } + KeyPart::False => { + output.push(FALSE); + } + KeyPart::True => { + output.push(TRUE); + } + } + } + Ok(output) +} + +pub fn decode_key(mut bytes: &[u8]) -> std::io::Result<Key> { + // Disallow empty key + if bytes.is_empty() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "key should not be empty", + )); + } + + let mut key = Key(vec![]); + while !bytes.is_empty() { + let tag = bytes[0]; + bytes = &bytes[1..]; + + let next_bytes = match tag { + self::STRING => { + let (next_bytes, data) = parse_slice(bytes)?; + let data = String::from_utf8(data).map_err(|_| { + std::io::Error::new(std::io::ErrorKind::InvalidData, "invalid utf8") + })?; + key.0.push(KeyPart::String(data)); + next_bytes + } + self::NEGINTSTART..=self::POSINTEND => { + let (next_bytes, data) = bigint::decode_from(bytes, tag)?; + key.0.push(KeyPart::Int(data)); + next_bytes + } + self::DOUBLE => { + let (next_bytes, data) = double::decode_from(bytes)?; + key.0.push(KeyPart::Float(data)); + next_bytes + } + self::BYTES => { + let (next_bytes, data) = parse_slice(bytes)?; + key.0.push(KeyPart::Bytes(data)); + next_bytes + } + self::FALSE => { + key.0.push(KeyPart::False); + bytes + } + self::TRUE => { + key.0.push(KeyPart::True); + bytes + } + _ => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "invalid tag", + )) + } + }; + + bytes = next_bytes; + } + Ok(key) +} + +fn escape_raw_bytes_into(out: &mut Vec<u8>, x: &[u8]) { + for &b in x { + out.push(b); + if b == 0 { + out.push(ESCAPE); + } + } +} + +mod bigint { + use num_bigint::BigInt; + use num_bigint::Sign; + + use super::parse_byte; + use super::parse_bytes; + const MAX_SZ: usize = 8; + + // Ported from https://github.com/foundationdb-rs/foundationdb-rs/blob/7415e116d5d96c2630976058de28e439eed7e809/foundationdb/src/tuple/pack.rs#L575 + pub fn encode_into(out: &mut Vec<u8>, key: &BigInt) -> std::io::Result<()> { + if key.sign() == Sign::NoSign { + out.push(super::INTZERO); + return Ok(()); + } + let (sign, mut bytes) = key.to_bytes_be(); + let n = bytes.len(); + match sign { + Sign::Minus => { + if n <= MAX_SZ { + out.push(super::INTZERO - n as u8); + } else { + out.extend_from_slice(&[super::NEGINTSTART, bigint_n(n)? ^ 0xff]); + } + invert(&mut bytes); + out.extend_from_slice(&bytes); + } + Sign::NoSign => unreachable!(), + Sign::Plus => { + if n <= MAX_SZ { + out.push(super::INTZERO + n as u8); + } else { + out.extend_from_slice(&[super::POSINTEND, bigint_n(n)?]); + } + out.extend_from_slice(&bytes); + } + } + Ok(()) + } + + pub fn decode_from( + input: &[u8], + tag: u8, + ) -> std::io::Result<(&[u8], BigInt)> { + if super::INTZERO <= tag && tag <= super::INTZERO + MAX_SZ as u8 { + let n = (tag - super::INTZERO) as usize; + let (input, bytes) = parse_bytes(input, n)?; + Ok((input, BigInt::from_bytes_be(Sign::Plus, bytes))) + } else if super::INTZERO - MAX_SZ as u8 <= tag && tag < super::INTZERO { + let n = (super::INTZERO - tag) as usize; + let (input, bytes) = parse_bytes(input, n)?; + Ok((input, BigInt::from_bytes_be(Sign::Minus, &inverted(bytes)))) + } else if tag == super::NEGINTSTART { + let (input, raw_length) = parse_byte(input)?; + let n = usize::from(raw_length ^ 0xff); + let (input, bytes) = parse_bytes(input, n)?; + Ok((input, BigInt::from_bytes_be(Sign::Minus, &inverted(bytes)))) + } else if tag == super::POSINTEND { + let (input, raw_length) = parse_byte(input)?; + let n: usize = usize::from(raw_length); + let (input, bytes) = parse_bytes(input, n)?; + Ok((input, BigInt::from_bytes_be(Sign::Plus, bytes))) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("unknown bigint tag: {}", tag), + )) + } + } + + fn invert(bytes: &mut [u8]) { + // The ones' complement of a binary number is defined as the value + // obtained by inverting all the bits in the binary representation + // of the number (swapping 0s for 1s and vice versa). + for byte in bytes.iter_mut() { + *byte = !*byte; + } + } + + fn inverted(bytes: &[u8]) -> Vec<u8> { + // The ones' complement of a binary number is defined as the value + // obtained by inverting all the bits in the binary representation + // of the number (swapping 0s for 1s and vice versa). + bytes.iter().map(|byte| !*byte).collect() + } + + fn bigint_n(n: usize) -> std::io::Result<u8> { + u8::try_from(n).map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "BigUint requires more than 255 bytes to be represented", + ) + }) + } +} + +mod double { + macro_rules! sign_bit { + ($type:ident) => { + (1 << (std::mem::size_of::<$type>() * 8 - 1)) + }; + } + + fn f64_to_ux_be_bytes(f: f64) -> [u8; 8] { + let u = if f.is_sign_negative() { + f.to_bits() ^ ::std::u64::MAX + } else { + f.to_bits() ^ sign_bit!(u64) + }; + u.to_be_bytes() + } + + pub fn encode_into(out: &mut Vec<u8>, x: f64) { + out.push(super::DOUBLE); + out.extend_from_slice(&f64_to_ux_be_bytes(super::canonicalize_f64(x))); + } + + pub fn decode_from(input: &[u8]) -> std::io::Result<(&[u8], f64)> { + let (input, bytes) = super::parse_bytes(input, 8)?; + let mut arr = [0u8; 8]; + arr.copy_from_slice(bytes); + let u = u64::from_be_bytes(arr); + Ok(( + input, + f64::from_bits(if (u & sign_bit!(u64)) == 0 { + u ^ ::std::u64::MAX + } else { + u ^ sign_bit!(u64) + }), + )) + } +} + +#[inline] +fn parse_bytes(input: &[u8], num: usize) -> std::io::Result<(&[u8], &[u8])> { + if input.len() < num { + Err(std::io::ErrorKind::UnexpectedEof.into()) + } else { + Ok((&input[num..], &input[..num])) + } +} + +#[inline] +fn parse_byte(input: &[u8]) -> std::io::Result<(&[u8], u8)> { + if input.is_empty() { + Err(std::io::ErrorKind::UnexpectedEof.into()) + } else { + Ok((&input[1..], input[0])) + } +} + +fn parse_slice(input: &[u8]) -> std::io::Result<(&[u8], Vec<u8>)> { + let mut output: Vec<u8> = Vec::new(); + let mut i = 0usize; + + while i < input.len() { + let byte = input[i]; + i += 1; + + if byte == 0 { + if input.get(i).copied() == Some(ESCAPE) { + output.push(0); + i += 1; + continue; + } else { + return Ok((&input[i..], output)); + } + } + + output.push(byte); + } + + Err(std::io::ErrorKind::UnexpectedEof.into()) +} + +#[cfg(test)] +mod tests { + use num_bigint::BigInt; + use std::cmp::Ordering; + + use crate::Key; + use crate::KeyPart; + + use super::decode_key; + use super::encode_key; + + fn roundtrip(key: Key) { + let bytes = encode_key(&key).unwrap(); + let decoded = decode_key(&bytes).unwrap(); + assert_eq!(&key, &decoded); + assert_eq!(format!("{:?}", key), format!("{:?}", decoded)); + } + + fn check_order(a: Key, b: Key, expected: Ordering) { + let a_bytes = encode_key(&a).unwrap(); + let b_bytes = encode_key(&b).unwrap(); + + assert_eq!(a.cmp(&b), expected); + assert_eq!(a_bytes.cmp(&b_bytes), expected); + } + + fn check_bijection(key: Key, serialized: &[u8]) { + let bytes = encode_key(&key).unwrap(); + assert_eq!(&bytes[..], serialized); + let decoded = decode_key(serialized).unwrap(); + assert_eq!(&key, &decoded); + } + + #[test] + fn simple_roundtrip() { + roundtrip(Key(vec![ + KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00]), + KeyPart::String("foo".to_string()), + KeyPart::Float(-f64::NAN), + KeyPart::Float(-f64::INFINITY), + KeyPart::Float(-42.1), + KeyPart::Float(-0.0), + KeyPart::Float(0.0), + KeyPart::Float(42.1), + KeyPart::Float(f64::INFINITY), + KeyPart::Float(f64::NAN), + KeyPart::Int(BigInt::from(-10000)), + KeyPart::Int(BigInt::from(-1)), + KeyPart::Int(BigInt::from(0)), + KeyPart::Int(BigInt::from(1)), + KeyPart::Int(BigInt::from(10000)), + KeyPart::False, + KeyPart::True, + ])); + } + + #[test] + #[rustfmt::skip] + fn order_bytes() { + check_order( + Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), + Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), + Ordering::Equal, + ); + + check_order( + Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), + Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x01])]), + Ordering::Less, + ); + + check_order( + Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x01])]), + Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), + Ordering::Greater, + ); + + check_order( + Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), + Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00, 0x00])]), + Ordering::Less, + ); + + check_order( + Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00, 0x00])]), + Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), + Ordering::Greater, + ); + } + + #[test] + #[rustfmt::skip] + fn order_tags() { + check_order( + Key(vec![KeyPart::Bytes(vec![])]), + Key(vec![KeyPart::String("".into())]), + Ordering::Less, + ); + + check_order( + Key(vec![KeyPart::String("".into())]), + Key(vec![KeyPart::Int(BigInt::from(0))]), + Ordering::Less, + ); + + check_order( + Key(vec![KeyPart::Int(BigInt::from(0))]), + Key(vec![KeyPart::Float(0.0)]), + Ordering::Less, + ); + + check_order( + Key(vec![KeyPart::Float(0.0)]), + Key(vec![KeyPart::False]), + Ordering::Less, + ); + + check_order( + Key(vec![KeyPart::False]), + Key(vec![KeyPart::True]), + Ordering::Less, + ); + + check_order( + Key(vec![KeyPart::True]), + Key(vec![KeyPart::Bytes(vec![])]), + Ordering::Greater, + ); + } + + #[test] + #[rustfmt::skip] + fn order_floats() { + check_order( + Key(vec![KeyPart::Float(-f64::NAN)]), + Key(vec![KeyPart::Float(-f64::INFINITY)]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Float(-f64::INFINITY)]), + Key(vec![KeyPart::Float(-10.0)]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Float(-10.0)]), + Key(vec![KeyPart::Float(-0.0)]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Float(-0.0)]), + Key(vec![KeyPart::Float(0.0)]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Float(0.0)]), + Key(vec![KeyPart::Float(10.0)]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Float(10.0)]), + Key(vec![KeyPart::Float(f64::INFINITY)]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Float(f64::INFINITY)]), + Key(vec![KeyPart::Float(f64::NAN)]), + Ordering::Less, + ); + } + + #[test] + #[rustfmt::skip] + fn order_ints() { + check_order( + Key(vec![KeyPart::Int(BigInt::from(-10000))]), + Key(vec![KeyPart::Int(BigInt::from(-100))]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Int(BigInt::from(-100))]), + Key(vec![KeyPart::Int(BigInt::from(-1))]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Int(BigInt::from(-1))]), + Key(vec![KeyPart::Int(BigInt::from(0))]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Int(BigInt::from(0))]), + Key(vec![KeyPart::Int(BigInt::from(1))]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Int(BigInt::from(1))]), + Key(vec![KeyPart::Int(BigInt::from(100))]), + Ordering::Less, + ); + check_order( + Key(vec![KeyPart::Int(BigInt::from(100))]), + Key(vec![KeyPart::Int(BigInt::from(10000))]), + Ordering::Less, + ); + } + + #[test] + #[rustfmt::skip] + fn float_canonicalization() { + let key1 = Key(vec![KeyPart::Float(f64::from_bits(0x7ff8000000000001))]); + let key2 = Key(vec![KeyPart::Float(f64::from_bits(0x7ff8000000000002))]); + + assert_eq!(key1, key2); + assert_eq!(encode_key(&key1).unwrap(), encode_key(&key2).unwrap()); + } + + #[test] + #[rustfmt::skip] + fn explicit_bijection() { + // string + check_bijection( + Key(vec![KeyPart::String("hello".into())]), + &[0x02, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00], + ); + + // zero byte escape + check_bijection( + Key(vec![KeyPart::Bytes(vec![0x01, 0x02, 0x00, 0x07, 0x08])]), + &[0x01, 0x01, 0x02, 0x00, 0xff, 0x07, 0x08, 0x00], + ); + + // array + check_bijection( + Key(vec![ + KeyPart::String("hello".into()), + KeyPart::Bytes(vec![0x01, 0x02, 0x00, 0x07, 0x08]), + ]), + &[ + 0x02, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00, /* string */ + 0x01, 0x01, 0x02, 0x00, 0xff, 0x07, 0x08, 0x00, /* bytes */ + ], + ); + } +} |