summaryrefslogtreecommitdiff
path: root/ext/kv/codec.rs
diff options
context:
space:
mode:
Diffstat (limited to 'ext/kv/codec.rs')
-rw-r--r--ext/kv/codec.rs559
1 files changed, 559 insertions, 0 deletions
diff --git a/ext/kv/codec.rs b/ext/kv/codec.rs
new file mode 100644
index 000000000..b2acfdbc2
--- /dev/null
+++ b/ext/kv/codec.rs
@@ -0,0 +1,559 @@
+// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license.
+// Ported from https://github.com/foundationdb-rs/foundationdb-rs/blob/main/foundationdb/src/tuple/pack.rs
+
+use crate::Key;
+use crate::KeyPart;
+
+//const NIL: u8 = 0x00;
+const BYTES: u8 = 0x01;
+const STRING: u8 = 0x02;
+//const NESTED: u8 = 0x05;
+const NEGINTSTART: u8 = 0x0b;
+const INTZERO: u8 = 0x14;
+const POSINTEND: u8 = 0x1d;
+//const FLOAT: u8 = 0x20;
+const DOUBLE: u8 = 0x21;
+const FALSE: u8 = 0x26;
+const TRUE: u8 = 0x27;
+
+const ESCAPE: u8 = 0xff;
+
+const CANONICAL_NAN_POS: u64 = 0x7ff8000000000000u64;
+const CANONICAL_NAN_NEG: u64 = 0xfff8000000000000u64;
+
+pub fn canonicalize_f64(n: f64) -> f64 {
+ if n.is_nan() {
+ if n.is_sign_negative() {
+ f64::from_bits(CANONICAL_NAN_NEG)
+ } else {
+ f64::from_bits(CANONICAL_NAN_POS)
+ }
+ } else {
+ n
+ }
+}
+
+pub fn encode_key(key: &Key) -> std::io::Result<Vec<u8>> {
+ // Disallow empty key
+ if key.0.is_empty() {
+ return Err(std::io::Error::new(
+ std::io::ErrorKind::InvalidInput,
+ "key should not be empty",
+ ));
+ }
+
+ let mut output: Vec<u8> = vec![];
+ for part in &key.0 {
+ match part {
+ KeyPart::String(key) => {
+ output.push(STRING);
+ escape_raw_bytes_into(&mut output, key.as_bytes());
+ output.push(0);
+ }
+ KeyPart::Int(key) => {
+ bigint::encode_into(&mut output, key)?;
+ }
+ KeyPart::Float(key) => {
+ double::encode_into(&mut output, *key);
+ }
+ KeyPart::Bytes(key) => {
+ output.push(BYTES);
+ escape_raw_bytes_into(&mut output, key);
+ output.push(0);
+ }
+ KeyPart::False => {
+ output.push(FALSE);
+ }
+ KeyPart::True => {
+ output.push(TRUE);
+ }
+ }
+ }
+ Ok(output)
+}
+
+pub fn decode_key(mut bytes: &[u8]) -> std::io::Result<Key> {
+ // Disallow empty key
+ if bytes.is_empty() {
+ return Err(std::io::Error::new(
+ std::io::ErrorKind::InvalidInput,
+ "key should not be empty",
+ ));
+ }
+
+ let mut key = Key(vec![]);
+ while !bytes.is_empty() {
+ let tag = bytes[0];
+ bytes = &bytes[1..];
+
+ let next_bytes = match tag {
+ self::STRING => {
+ let (next_bytes, data) = parse_slice(bytes)?;
+ let data = String::from_utf8(data).map_err(|_| {
+ std::io::Error::new(std::io::ErrorKind::InvalidData, "invalid utf8")
+ })?;
+ key.0.push(KeyPart::String(data));
+ next_bytes
+ }
+ self::NEGINTSTART..=self::POSINTEND => {
+ let (next_bytes, data) = bigint::decode_from(bytes, tag)?;
+ key.0.push(KeyPart::Int(data));
+ next_bytes
+ }
+ self::DOUBLE => {
+ let (next_bytes, data) = double::decode_from(bytes)?;
+ key.0.push(KeyPart::Float(data));
+ next_bytes
+ }
+ self::BYTES => {
+ let (next_bytes, data) = parse_slice(bytes)?;
+ key.0.push(KeyPart::Bytes(data));
+ next_bytes
+ }
+ self::FALSE => {
+ key.0.push(KeyPart::False);
+ bytes
+ }
+ self::TRUE => {
+ key.0.push(KeyPart::True);
+ bytes
+ }
+ _ => {
+ return Err(std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ "invalid tag",
+ ))
+ }
+ };
+
+ bytes = next_bytes;
+ }
+ Ok(key)
+}
+
+fn escape_raw_bytes_into(out: &mut Vec<u8>, x: &[u8]) {
+ for &b in x {
+ out.push(b);
+ if b == 0 {
+ out.push(ESCAPE);
+ }
+ }
+}
+
+mod bigint {
+ use num_bigint::BigInt;
+ use num_bigint::Sign;
+
+ use super::parse_byte;
+ use super::parse_bytes;
+ const MAX_SZ: usize = 8;
+
+ // Ported from https://github.com/foundationdb-rs/foundationdb-rs/blob/7415e116d5d96c2630976058de28e439eed7e809/foundationdb/src/tuple/pack.rs#L575
+ pub fn encode_into(out: &mut Vec<u8>, key: &BigInt) -> std::io::Result<()> {
+ if key.sign() == Sign::NoSign {
+ out.push(super::INTZERO);
+ return Ok(());
+ }
+ let (sign, mut bytes) = key.to_bytes_be();
+ let n = bytes.len();
+ match sign {
+ Sign::Minus => {
+ if n <= MAX_SZ {
+ out.push(super::INTZERO - n as u8);
+ } else {
+ out.extend_from_slice(&[super::NEGINTSTART, bigint_n(n)? ^ 0xff]);
+ }
+ invert(&mut bytes);
+ out.extend_from_slice(&bytes);
+ }
+ Sign::NoSign => unreachable!(),
+ Sign::Plus => {
+ if n <= MAX_SZ {
+ out.push(super::INTZERO + n as u8);
+ } else {
+ out.extend_from_slice(&[super::POSINTEND, bigint_n(n)?]);
+ }
+ out.extend_from_slice(&bytes);
+ }
+ }
+ Ok(())
+ }
+
+ pub fn decode_from(
+ input: &[u8],
+ tag: u8,
+ ) -> std::io::Result<(&[u8], BigInt)> {
+ if super::INTZERO <= tag && tag <= super::INTZERO + MAX_SZ as u8 {
+ let n = (tag - super::INTZERO) as usize;
+ let (input, bytes) = parse_bytes(input, n)?;
+ Ok((input, BigInt::from_bytes_be(Sign::Plus, bytes)))
+ } else if super::INTZERO - MAX_SZ as u8 <= tag && tag < super::INTZERO {
+ let n = (super::INTZERO - tag) as usize;
+ let (input, bytes) = parse_bytes(input, n)?;
+ Ok((input, BigInt::from_bytes_be(Sign::Minus, &inverted(bytes))))
+ } else if tag == super::NEGINTSTART {
+ let (input, raw_length) = parse_byte(input)?;
+ let n = usize::from(raw_length ^ 0xff);
+ let (input, bytes) = parse_bytes(input, n)?;
+ Ok((input, BigInt::from_bytes_be(Sign::Minus, &inverted(bytes))))
+ } else if tag == super::POSINTEND {
+ let (input, raw_length) = parse_byte(input)?;
+ let n: usize = usize::from(raw_length);
+ let (input, bytes) = parse_bytes(input, n)?;
+ Ok((input, BigInt::from_bytes_be(Sign::Plus, bytes)))
+ } else {
+ Err(std::io::Error::new(
+ std::io::ErrorKind::InvalidData,
+ format!("unknown bigint tag: {}", tag),
+ ))
+ }
+ }
+
+ fn invert(bytes: &mut [u8]) {
+ // The ones' complement of a binary number is defined as the value
+ // obtained by inverting all the bits in the binary representation
+ // of the number (swapping 0s for 1s and vice versa).
+ for byte in bytes.iter_mut() {
+ *byte = !*byte;
+ }
+ }
+
+ fn inverted(bytes: &[u8]) -> Vec<u8> {
+ // The ones' complement of a binary number is defined as the value
+ // obtained by inverting all the bits in the binary representation
+ // of the number (swapping 0s for 1s and vice versa).
+ bytes.iter().map(|byte| !*byte).collect()
+ }
+
+ fn bigint_n(n: usize) -> std::io::Result<u8> {
+ u8::try_from(n).map_err(|_| {
+ std::io::Error::new(
+ std::io::ErrorKind::InvalidInput,
+ "BigUint requires more than 255 bytes to be represented",
+ )
+ })
+ }
+}
+
+mod double {
+ macro_rules! sign_bit {
+ ($type:ident) => {
+ (1 << (std::mem::size_of::<$type>() * 8 - 1))
+ };
+ }
+
+ fn f64_to_ux_be_bytes(f: f64) -> [u8; 8] {
+ let u = if f.is_sign_negative() {
+ f.to_bits() ^ ::std::u64::MAX
+ } else {
+ f.to_bits() ^ sign_bit!(u64)
+ };
+ u.to_be_bytes()
+ }
+
+ pub fn encode_into(out: &mut Vec<u8>, x: f64) {
+ out.push(super::DOUBLE);
+ out.extend_from_slice(&f64_to_ux_be_bytes(super::canonicalize_f64(x)));
+ }
+
+ pub fn decode_from(input: &[u8]) -> std::io::Result<(&[u8], f64)> {
+ let (input, bytes) = super::parse_bytes(input, 8)?;
+ let mut arr = [0u8; 8];
+ arr.copy_from_slice(bytes);
+ let u = u64::from_be_bytes(arr);
+ Ok((
+ input,
+ f64::from_bits(if (u & sign_bit!(u64)) == 0 {
+ u ^ ::std::u64::MAX
+ } else {
+ u ^ sign_bit!(u64)
+ }),
+ ))
+ }
+}
+
+#[inline]
+fn parse_bytes(input: &[u8], num: usize) -> std::io::Result<(&[u8], &[u8])> {
+ if input.len() < num {
+ Err(std::io::ErrorKind::UnexpectedEof.into())
+ } else {
+ Ok((&input[num..], &input[..num]))
+ }
+}
+
+#[inline]
+fn parse_byte(input: &[u8]) -> std::io::Result<(&[u8], u8)> {
+ if input.is_empty() {
+ Err(std::io::ErrorKind::UnexpectedEof.into())
+ } else {
+ Ok((&input[1..], input[0]))
+ }
+}
+
+fn parse_slice(input: &[u8]) -> std::io::Result<(&[u8], Vec<u8>)> {
+ let mut output: Vec<u8> = Vec::new();
+ let mut i = 0usize;
+
+ while i < input.len() {
+ let byte = input[i];
+ i += 1;
+
+ if byte == 0 {
+ if input.get(i).copied() == Some(ESCAPE) {
+ output.push(0);
+ i += 1;
+ continue;
+ } else {
+ return Ok((&input[i..], output));
+ }
+ }
+
+ output.push(byte);
+ }
+
+ Err(std::io::ErrorKind::UnexpectedEof.into())
+}
+
+#[cfg(test)]
+mod tests {
+ use num_bigint::BigInt;
+ use std::cmp::Ordering;
+
+ use crate::Key;
+ use crate::KeyPart;
+
+ use super::decode_key;
+ use super::encode_key;
+
+ fn roundtrip(key: Key) {
+ let bytes = encode_key(&key).unwrap();
+ let decoded = decode_key(&bytes).unwrap();
+ assert_eq!(&key, &decoded);
+ assert_eq!(format!("{:?}", key), format!("{:?}", decoded));
+ }
+
+ fn check_order(a: Key, b: Key, expected: Ordering) {
+ let a_bytes = encode_key(&a).unwrap();
+ let b_bytes = encode_key(&b).unwrap();
+
+ assert_eq!(a.cmp(&b), expected);
+ assert_eq!(a_bytes.cmp(&b_bytes), expected);
+ }
+
+ fn check_bijection(key: Key, serialized: &[u8]) {
+ let bytes = encode_key(&key).unwrap();
+ assert_eq!(&bytes[..], serialized);
+ let decoded = decode_key(serialized).unwrap();
+ assert_eq!(&key, &decoded);
+ }
+
+ #[test]
+ fn simple_roundtrip() {
+ roundtrip(Key(vec![
+ KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00]),
+ KeyPart::String("foo".to_string()),
+ KeyPart::Float(-f64::NAN),
+ KeyPart::Float(-f64::INFINITY),
+ KeyPart::Float(-42.1),
+ KeyPart::Float(-0.0),
+ KeyPart::Float(0.0),
+ KeyPart::Float(42.1),
+ KeyPart::Float(f64::INFINITY),
+ KeyPart::Float(f64::NAN),
+ KeyPart::Int(BigInt::from(-10000)),
+ KeyPart::Int(BigInt::from(-1)),
+ KeyPart::Int(BigInt::from(0)),
+ KeyPart::Int(BigInt::from(1)),
+ KeyPart::Int(BigInt::from(10000)),
+ KeyPart::False,
+ KeyPart::True,
+ ]));
+ }
+
+ #[test]
+ #[rustfmt::skip]
+ fn order_bytes() {
+ check_order(
+ Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
+ Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
+ Ordering::Equal,
+ );
+
+ check_order(
+ Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
+ Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x01])]),
+ Ordering::Less,
+ );
+
+ check_order(
+ Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x01])]),
+ Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
+ Ordering::Greater,
+ );
+
+ check_order(
+ Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
+ Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00, 0x00])]),
+ Ordering::Less,
+ );
+
+ check_order(
+ Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00, 0x00])]),
+ Key(vec![KeyPart::Bytes(vec![0, 1, 2, 3, 0xff, 0x00, 0xff, 0x00])]),
+ Ordering::Greater,
+ );
+ }
+
+ #[test]
+ #[rustfmt::skip]
+ fn order_tags() {
+ check_order(
+ Key(vec![KeyPart::Bytes(vec![])]),
+ Key(vec![KeyPart::String("".into())]),
+ Ordering::Less,
+ );
+
+ check_order(
+ Key(vec![KeyPart::String("".into())]),
+ Key(vec![KeyPart::Int(BigInt::from(0))]),
+ Ordering::Less,
+ );
+
+ check_order(
+ Key(vec![KeyPart::Int(BigInt::from(0))]),
+ Key(vec![KeyPart::Float(0.0)]),
+ Ordering::Less,
+ );
+
+ check_order(
+ Key(vec![KeyPart::Float(0.0)]),
+ Key(vec![KeyPart::False]),
+ Ordering::Less,
+ );
+
+ check_order(
+ Key(vec![KeyPart::False]),
+ Key(vec![KeyPart::True]),
+ Ordering::Less,
+ );
+
+ check_order(
+ Key(vec![KeyPart::True]),
+ Key(vec![KeyPart::Bytes(vec![])]),
+ Ordering::Greater,
+ );
+ }
+
+ #[test]
+ #[rustfmt::skip]
+ fn order_floats() {
+ check_order(
+ Key(vec![KeyPart::Float(-f64::NAN)]),
+ Key(vec![KeyPart::Float(-f64::INFINITY)]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Float(-f64::INFINITY)]),
+ Key(vec![KeyPart::Float(-10.0)]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Float(-10.0)]),
+ Key(vec![KeyPart::Float(-0.0)]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Float(-0.0)]),
+ Key(vec![KeyPart::Float(0.0)]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Float(0.0)]),
+ Key(vec![KeyPart::Float(10.0)]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Float(10.0)]),
+ Key(vec![KeyPart::Float(f64::INFINITY)]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Float(f64::INFINITY)]),
+ Key(vec![KeyPart::Float(f64::NAN)]),
+ Ordering::Less,
+ );
+ }
+
+ #[test]
+ #[rustfmt::skip]
+ fn order_ints() {
+ check_order(
+ Key(vec![KeyPart::Int(BigInt::from(-10000))]),
+ Key(vec![KeyPart::Int(BigInt::from(-100))]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Int(BigInt::from(-100))]),
+ Key(vec![KeyPart::Int(BigInt::from(-1))]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Int(BigInt::from(-1))]),
+ Key(vec![KeyPart::Int(BigInt::from(0))]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Int(BigInt::from(0))]),
+ Key(vec![KeyPart::Int(BigInt::from(1))]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Int(BigInt::from(1))]),
+ Key(vec![KeyPart::Int(BigInt::from(100))]),
+ Ordering::Less,
+ );
+ check_order(
+ Key(vec![KeyPart::Int(BigInt::from(100))]),
+ Key(vec![KeyPart::Int(BigInt::from(10000))]),
+ Ordering::Less,
+ );
+ }
+
+ #[test]
+ #[rustfmt::skip]
+ fn float_canonicalization() {
+ let key1 = Key(vec![KeyPart::Float(f64::from_bits(0x7ff8000000000001))]);
+ let key2 = Key(vec![KeyPart::Float(f64::from_bits(0x7ff8000000000002))]);
+
+ assert_eq!(key1, key2);
+ assert_eq!(encode_key(&key1).unwrap(), encode_key(&key2).unwrap());
+ }
+
+ #[test]
+ #[rustfmt::skip]
+ fn explicit_bijection() {
+ // string
+ check_bijection(
+ Key(vec![KeyPart::String("hello".into())]),
+ &[0x02, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00],
+ );
+
+ // zero byte escape
+ check_bijection(
+ Key(vec![KeyPart::Bytes(vec![0x01, 0x02, 0x00, 0x07, 0x08])]),
+ &[0x01, 0x01, 0x02, 0x00, 0xff, 0x07, 0x08, 0x00],
+ );
+
+ // array
+ check_bijection(
+ Key(vec![
+ KeyPart::String("hello".into()),
+ KeyPart::Bytes(vec![0x01, 0x02, 0x00, 0x07, 0x08]),
+ ]),
+ &[
+ 0x02, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00, /* string */
+ 0x01, 0x01, 0x02, 0x00, 0xff, 0x07, 0x08, 0x00, /* bytes */
+ ],
+ );
+ }
+}