diff options
Diffstat (limited to 'ext/kv/codec.rs')
-rw-r--r-- | ext/kv/codec.rs | 543 |
1 files changed, 0 insertions, 543 deletions
diff --git a/ext/kv/codec.rs b/ext/kv/codec.rs deleted file mode 100644 index 522c2e9bc..000000000 --- a/ext/kv/codec.rs +++ /dev/null @@ -1,543 +0,0 @@ -// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. -// Ported from https://github.com/foundationdb-rs/foundationdb-rs/blob/main/foundationdb/src/tuple/pack.rs - -use crate::Key; -use crate::KeyPart; - -//const NIL: u8 = 0x00; -const BYTES: u8 = 0x01; -const STRING: u8 = 0x02; -//const NESTED: u8 = 0x05; -const NEGINTSTART: u8 = 0x0b; -const INTZERO: u8 = 0x14; -const POSINTEND: u8 = 0x1d; -//const FLOAT: u8 = 0x20; -const DOUBLE: u8 = 0x21; -const FALSE: u8 = 0x26; -const TRUE: u8 = 0x27; - -const ESCAPE: u8 = 0xff; - -const CANONICAL_NAN_POS: u64 = 0x7ff8000000000000u64; -const CANONICAL_NAN_NEG: u64 = 0xfff8000000000000u64; - -pub fn canonicalize_f64(n: f64) -> f64 { - if n.is_nan() { - if n.is_sign_negative() { - f64::from_bits(CANONICAL_NAN_NEG) - } else { - f64::from_bits(CANONICAL_NAN_POS) - } - } else { - n - } -} - -pub fn encode_key(key: &Key) -> std::io::Result<Vec<u8>> { - let mut output: Vec<u8> = vec![]; - for part in &key.0 { - match part { - KeyPart::String(key) => { - output.push(STRING); - escape_raw_bytes_into(&mut output, key.as_bytes()); - output.push(0); - } - KeyPart::Int(key) => { - bigint::encode_into(&mut output, key)?; - } - KeyPart::Float(key) => { - double::encode_into(&mut output, *key); - } - KeyPart::Bytes(key) => { - output.push(BYTES); - escape_raw_bytes_into(&mut output, key); - output.push(0); - } - KeyPart::False => { - output.push(FALSE); - } - KeyPart::True => { - output.push(TRUE); - } - } - } - Ok(output) -} - -pub fn decode_key(mut bytes: &[u8]) -> std::io::Result<Key> { - let mut key = Key(vec![]); - while !bytes.is_empty() { - let tag = bytes[0]; - bytes = &bytes[1..]; - - let next_bytes = match tag { - self::STRING => { - let (next_bytes, data) = parse_slice(bytes)?; - let data = String::from_utf8(data).map_err(|_| { - std::io::Error::new(std::io::ErrorKind::InvalidData, "invalid utf8") - })?; - key.0.push(KeyPart::String(data)); - next_bytes - } - self::NEGINTSTART..=self::POSINTEND => { - let (next_bytes, data) = bigint::decode_from(bytes, tag)?; - key.0.push(KeyPart::Int(data)); - next_bytes - } - self::DOUBLE => { - let (next_bytes, data) = double::decode_from(bytes)?; - key.0.push(KeyPart::Float(data)); - next_bytes - } - self::BYTES => { - let (next_bytes, data) = parse_slice(bytes)?; - key.0.push(KeyPart::Bytes(data)); - next_bytes - } - self::FALSE => { - key.0.push(KeyPart::False); - bytes - } - self::TRUE => { - key.0.push(KeyPart::True); - bytes - } - _ => { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "invalid tag", - )) - } - }; - - bytes = next_bytes; - } - Ok(key) -} - -fn escape_raw_bytes_into(out: &mut Vec<u8>, x: &[u8]) { - for &b in x { - out.push(b); - if b == 0 { - out.push(ESCAPE); - } - } -} - -mod bigint { - use num_bigint::BigInt; - use num_bigint::Sign; - - use super::parse_byte; - use super::parse_bytes; - const MAX_SZ: usize = 8; - - // Ported from https://github.com/foundationdb-rs/foundationdb-rs/blob/7415e116d5d96c2630976058de28e439eed7e809/foundationdb/src/tuple/pack.rs#L575 - pub fn encode_into(out: &mut Vec<u8>, key: &BigInt) -> std::io::Result<()> { - if key.sign() == Sign::NoSign { - out.push(super::INTZERO); - return Ok(()); - } - let (sign, mut bytes) = key.to_bytes_be(); - let n = bytes.len(); - match sign { - Sign::Minus => { - if n <= MAX_SZ { - out.push(super::INTZERO - n as u8); - } else { - out.extend_from_slice(&[super::NEGINTSTART, bigint_n(n)? ^ 0xff]); - } - invert(&mut bytes); - out.extend_from_slice(&bytes); - } - Sign::NoSign => unreachable!(), - Sign::Plus => { - if n <= MAX_SZ { - out.push(super::INTZERO + n as u8); - } else { - out.extend_from_slice(&[super::POSINTEND, bigint_n(n)?]); - } - out.extend_from_slice(&bytes); - } - } - Ok(()) - } - - pub fn decode_from( - input: &[u8], - tag: u8, - ) -> std::io::Result<(&[u8], BigInt)> { - if super::INTZERO <= tag && tag <= super::INTZERO + MAX_SZ as u8 { - let n = (tag - super::INTZERO) as usize; - let (input, bytes) = parse_bytes(input, n)?; - Ok((input, BigInt::from_bytes_be(Sign::Plus, bytes))) - } else if super::INTZERO - MAX_SZ as u8 <= tag && tag < super::INTZERO { - let n = (super::INTZERO - tag) as usize; - let (input, bytes) = parse_bytes(input, n)?; - Ok((input, BigInt::from_bytes_be(Sign::Minus, &inverted(bytes)))) - } else if tag == super::NEGINTSTART { - let (input, raw_length) = parse_byte(input)?; - let n = usize::from(raw_length ^ 0xff); - let (input, bytes) = parse_bytes(input, n)?; - Ok((input, BigInt::from_bytes_be(Sign::Minus, &inverted(bytes)))) - } else if tag == super::POSINTEND { - let (input, raw_length) = parse_byte(input)?; - let n: usize = usize::from(raw_length); - let (input, bytes) = parse_bytes(input, n)?; - Ok((input, BigInt::from_bytes_be(Sign::Plus, bytes))) - } else { - Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("unknown bigint tag: {}", tag), - )) - } - } - - fn invert(bytes: &mut [u8]) { - // The ones' complement of a binary number is defined as the value - // obtained by inverting all the bits in the binary representation - // of the number (swapping 0s for 1s and vice versa). - for byte in bytes.iter_mut() { - *byte = !*byte; - } - } - - fn inverted(bytes: &[u8]) -> Vec<u8> { - // The ones' complement of a binary number is defined as the value - // obtained by inverting all the bits in the binary representation - // of the number (swapping 0s for 1s and vice versa). - bytes.iter().map(|byte| !*byte).collect() - } - - fn bigint_n(n: usize) -> std::io::Result<u8> { - u8::try_from(n).map_err(|_| { - std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "BigUint requires more than 255 bytes to be represented", - ) - }) - } -} - -mod double { - macro_rules! sign_bit { - ($type:ident) => { - (1 << (std::mem::size_of::<$type>() * 8 - 1)) - }; - } - - fn f64_to_ux_be_bytes(f: f64) -> [u8; 8] { - let u = if f.is_sign_negative() { - f.to_bits() ^ ::std::u64::MAX - } else { - f.to_bits() ^ sign_bit!(u64) - }; - u.to_be_bytes() - } - - pub fn encode_into(out: &mut Vec<u8>, x: f64) { - out.push(super::DOUBLE); - out.extend_from_slice(&f64_to_ux_be_bytes(super::canonicalize_f64(x))); - } - - pub fn decode_from(input: &[u8]) -> std::io::Result<(&[u8], f64)> { - let (input, bytes) = super::parse_bytes(input, 8)?; - let mut arr = [0u8; 8]; - arr.copy_from_slice(bytes); - let u = u64::from_be_bytes(arr); - Ok(( - input, - f64::from_bits(if (u & sign_bit!(u64)) == 0 { - u ^ ::std::u64::MAX - } else { - u ^ sign_bit!(u64) - }), - )) - } -} - -#[inline] -fn parse_bytes(input: &[u8], num: usize) -> std::io::Result<(&[u8], &[u8])> { - if input.len() < num { - Err(std::io::ErrorKind::UnexpectedEof.into()) - } else { - Ok((&input[num..], &input[..num])) - } -} - -#[inline] -fn parse_byte(input: &[u8]) -> std::io::Result<(&[u8], u8)> { - if input.is_empty() { - Err(std::io::ErrorKind::UnexpectedEof.into()) - } else { - Ok((&input[1..], input[0])) - } -} - -fn parse_slice(input: &[u8]) -> std::io::Result<(&[u8], Vec<u8>)> { - let mut output: Vec<u8> = Vec::new(); - let mut i = 0usize; - - while i < input.len() { - let byte = input[i]; - i += 1; - - if byte == 0 { - if input.get(i).copied() == Some(ESCAPE) { - output.push(0); - i += 1; - continue; - } else { - return Ok((&input[i..], output)); - } - } - - output.push(byte); - } - - Err(std::io::ErrorKind::UnexpectedEof.into()) -} - -#[cfg(test)] -mod tests { - use num_bigint::BigInt; - use std::cmp::Ordering; - - use crate::Key; - use crate::KeyPart; - - use super::decode_key; - use super::encode_key; - - fn roundtrip(key: Key) { - let bytes = encode_key(&key).unwrap(); - let decoded = decode_key(&bytes).unwrap(); - assert_eq!(&key, &decoded); - assert_eq!(format!("{:?}", key), format!("{:?}", decoded)); - } - - fn check_order(a: Key, b: Key, expected: Ordering) { - let a_bytes = encode_key(&a).unwrap(); - let b_bytes = encode_key(&b).unwrap(); - - assert_eq!(a.cmp(&b), expected); - assert_eq!(a_bytes.cmp(&b_bytes), expected); - } - - fn check_bijection(key: Key, serialized: &[u8]) { - let bytes = encode_key(&key).unwrap(); - assert_eq!(&bytes[..], serialized); - let decoded = decode_key(serialized).unwrap(); - assert_eq!(&key, &decoded); - } - - #[test] - fn simple_roundtrip() { - roundtrip(Key(vec![ - KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00]), - KeyPart::String("foo".to_string()), - KeyPart::Float(-f64::NAN), - KeyPart::Float(-f64::INFINITY), - KeyPart::Float(-42.1), - KeyPart::Float(-0.0), - KeyPart::Float(0.0), - KeyPart::Float(42.1), - KeyPart::Float(f64::INFINITY), - KeyPart::Float(f64::NAN), - KeyPart::Int(BigInt::from(-10000)), - KeyPart::Int(BigInt::from(-1)), - KeyPart::Int(BigInt::from(0)), - KeyPart::Int(BigInt::from(1)), - KeyPart::Int(BigInt::from(10000)), - KeyPart::False, - KeyPart::True, - ])); - } - - #[test] - #[rustfmt::skip] - fn order_bytes() { - check_order( - Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), - Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), - Ordering::Equal, - ); - - check_order( - Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), - Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x01])]), - Ordering::Less, - ); - - check_order( - Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x01])]), - Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), - Ordering::Greater, - ); - - check_order( - Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), - Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00, 0x00])]), - Ordering::Less, - ); - - check_order( - Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00, 0x00])]), - Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]), - Ordering::Greater, - ); - } - - #[test] - #[rustfmt::skip] - fn order_tags() { - check_order( - Key(vec![KeyPart::Bytes(vec![])]), - Key(vec![KeyPart::String("".into())]), - Ordering::Less, - ); - - check_order( - Key(vec![KeyPart::String("".into())]), - Key(vec![KeyPart::Int(BigInt::from(0))]), - Ordering::Less, - ); - - check_order( - Key(vec![KeyPart::Int(BigInt::from(0))]), - Key(vec![KeyPart::Float(0.0)]), - Ordering::Less, - ); - - check_order( - Key(vec![KeyPart::Float(0.0)]), - Key(vec![KeyPart::False]), - Ordering::Less, - ); - - check_order( - Key(vec![KeyPart::False]), - Key(vec![KeyPart::True]), - Ordering::Less, - ); - - check_order( - Key(vec![KeyPart::True]), - Key(vec![KeyPart::Bytes(vec![])]), - Ordering::Greater, - ); - } - - #[test] - #[rustfmt::skip] - fn order_floats() { - check_order( - Key(vec![KeyPart::Float(-f64::NAN)]), - Key(vec![KeyPart::Float(-f64::INFINITY)]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Float(-f64::INFINITY)]), - Key(vec![KeyPart::Float(-10.0)]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Float(-10.0)]), - Key(vec![KeyPart::Float(-0.0)]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Float(-0.0)]), - Key(vec![KeyPart::Float(0.0)]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Float(0.0)]), - Key(vec![KeyPart::Float(10.0)]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Float(10.0)]), - Key(vec![KeyPart::Float(f64::INFINITY)]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Float(f64::INFINITY)]), - Key(vec![KeyPart::Float(f64::NAN)]), - Ordering::Less, - ); - } - - #[test] - #[rustfmt::skip] - fn order_ints() { - check_order( - Key(vec![KeyPart::Int(BigInt::from(-10000))]), - Key(vec![KeyPart::Int(BigInt::from(-100))]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Int(BigInt::from(-100))]), - Key(vec![KeyPart::Int(BigInt::from(-1))]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Int(BigInt::from(-1))]), - Key(vec![KeyPart::Int(BigInt::from(0))]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Int(BigInt::from(0))]), - Key(vec![KeyPart::Int(BigInt::from(1))]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Int(BigInt::from(1))]), - Key(vec![KeyPart::Int(BigInt::from(100))]), - Ordering::Less, - ); - check_order( - Key(vec![KeyPart::Int(BigInt::from(100))]), - Key(vec![KeyPart::Int(BigInt::from(10000))]), - Ordering::Less, - ); - } - - #[test] - #[rustfmt::skip] - fn float_canonicalization() { - let key1 = Key(vec![KeyPart::Float(f64::from_bits(0x7ff8000000000001))]); - let key2 = Key(vec![KeyPart::Float(f64::from_bits(0x7ff8000000000002))]); - - assert_eq!(key1, key2); - assert_eq!(encode_key(&key1).unwrap(), encode_key(&key2).unwrap()); - } - - #[test] - #[rustfmt::skip] - fn explicit_bijection() { - // string - check_bijection( - Key(vec![KeyPart::String("hello".into())]), - &[0x02, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00], - ); - - // zero byte escape - check_bijection( - Key(vec![KeyPart::Bytes(vec![0x01, 0x02, 0x00, 0x07, 0x08])]), - &[0x01, 0x01, 0x02, 0x00, 0xff, 0x07, 0x08, 0x00], - ); - - // array - check_bijection( - Key(vec![ - KeyPart::String("hello".into()), - KeyPart::Bytes(vec![0x01, 0x02, 0x00, 0x07, 0x08]), - ]), - &[ - 0x02, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00, /* string */ - 0x01, 0x01, 0x02, 0x00, 0xff, 0x07, 0x08, 0x00, /* bytes */ - ], - ); - } -} |