fix(ext/node): Match punycode module behavior to node (#22847)

Fixes #19214. We were using the `idna` crate to implement our polyfill for `punycode.toASCII` and `punycode.toUnicode`. The `idna` crate is correct, and adheres to the IDNA2003/2008 spec, but it turns out `node`'s implementations don't really follow any spec! Instead, node splits the domain by `'.'` and punycode encodes/decodes each part. This means that node's implementations will happily work on codepoints that are disallowed by the IDNA specs, causing the error in #19214. While fixing this, I went ahead and matched the node behavior on all of the punycode functions and enabled node's punycode test in our `node_compat` suite.
author: Nathan Whitaker <17734409+nathanwhit@users.noreply.github.com> 2024-03-11 15:49:43 -0700
committer: GitHub <noreply@github.com> 2024-03-11 15:49:43 -0700
commit: a77b2987bc90879af30a39ba274df9061cc7fbae (patch)
tree: ad7463374e66eb3aa61e41d96c512e67e717e349 /ext/node/ops/idna.rs
parent: d69aab62b0789dd54b8c09b54af022a38f060b5b (diff)
1 files changed, 136 insertions, 5 deletions
diff --git a/ext/node/ops/idna.rs b/ext/node/ops/idna.rs
index 884e812cc..9c9450c70 100644
--- a/ext/node/ops/idna.rs
+++ b/ext/node/ops/idna.rs
@@ -1,16 +1,126 @@
 // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
 
-use deno_core::error::AnyError;
+use deno_core::anyhow::Error;
+use deno_core::error::range_error;
 use deno_core::op2;
 
+use std::borrow::Cow;
+
+// map_domain, to_ascii and to_unicode are based on the punycode implementation in node.js
+// https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js
+
+const PUNY_PREFIX: &str = "xn--";
+
+fn invalid_input_err() -> Error {
+  range_error("Invalid input")
+}
+
+fn not_basic_err() -> Error {
+  range_error("Illegal input >= 0x80 (not a basic code point)")
+}
+
+/// map a domain by mapping each label with the given function
+fn map_domain<E>(
+  domain: &str,
+  f: impl Fn(&str) -> Result<Cow<'_, str>, E>,
+) -> Result<String, E> {
+  let mut result = String::with_capacity(domain.len());
+  let mut domain = domain;
+
+  // if it's an email, leave the local part as is
+  let mut parts = domain.split('@');
+  if let (Some(local), Some(remaining)) = (parts.next(), parts.next()) {
+    result.push_str(local);
+    result.push('@');
+    domain = remaining;
+  }
+
+  // split into labels and map each one
+  for (i, label) in domain.split('.').enumerate() {
+    if i > 0 {
+      result.push('.');
+    }
+    result.push_str(&f(label)?);
+  }
+  Ok(result)
+}
+
+/// Maps a unicode domain to ascii by punycode encoding each label
+///
+/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation
+fn to_ascii(input: &str) -> Result<String, Error> {
+  if input.is_ascii() {
+    return Ok(input.into());
+  }
+
+  let mut result = String::with_capacity(input.len()); // at least as long as input
+
+  let rest = map_domain(input, |label| {
+    if label.is_ascii() {
+      Ok(label.into())
+    } else {
+      idna::punycode::encode_str(label)
+        .map(|encoded| [PUNY_PREFIX, &encoded].join("").into()) // add the prefix
+        .ok_or_else(|| {
+          Error::msg("Input would take more than 63 characters to encode") // only error possible per the docs
+        })
+    }
+  })?;
+
+  result.push_str(&rest);
+  Ok(result)
+}
+
+/// Maps an ascii domain to unicode by punycode decoding each label
+///
+/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation
+fn to_unicode(input: &str) -> Result<String, Error> {
+  map_domain(input, |s| {
+    if let Some(puny) = s.strip_prefix(PUNY_PREFIX) {
+      // it's a punycode encoded label
+      Ok(
+        idna::punycode::decode_to_string(&puny.to_lowercase())
+          .ok_or_else(invalid_input_err)?
+          .into(),
+      )
+    } else {
+      Ok(s.into())
+    }
+  })
+}
+
+/// Converts a domain to unicode with behavior that is
+/// compatible with the `punycode` module in node.js
+#[op2]
+#[string]
+pub fn op_node_idna_punycode_to_ascii(
+  #[string] domain: String,
+) -> Result<String, Error> {
+  to_ascii(&domain)
+}
+
+/// Converts a domain to ASCII with behavior that is
+/// compatible with the `punycode` module in node.js
+#[op2]
+#[string]
+pub fn op_node_idna_punycode_to_unicode(
+  #[string] domain: String,
+) -> Result<String, Error> {
+  to_unicode(&domain)
+}
+
+/// Converts a domain to ASCII as per the IDNA spec
+/// (specifically UTS #46)
 #[op2]
 #[string]
 pub fn op_node_idna_domain_to_ascii(
   #[string] domain: String,
-) -> Result<String, AnyError> {
-  Ok(idna::domain_to_ascii(&domain)?)
+) -> Result<String, Error> {
+  idna::domain_to_ascii(&domain).map_err(|e| e.into())
 }
 
+/// Converts a domain to Unicode as per the IDNA spec
+/// (specifically UTS #46)
 #[op2]
 #[string]
 pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String {
@@ -19,8 +129,29 @@ pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String {
 
 #[op2]
 #[string]
-pub fn op_node_idna_punycode_decode(#[string] domain: String) -> String {
-  idna::punycode::decode_to_string(&domain).unwrap_or_default()
+pub fn op_node_idna_punycode_decode(
+  #[string] domain: String,
+) -> Result<String, Error> {
+  if domain.is_empty() {
+    return Ok(domain);
+  }
+
+  // all code points before the last delimiter must be basic
+  // see https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js#L215-L227
+  let last_dash = domain.len()
+    - 1
+    - domain
+      .bytes()
+      .rev()
+      .position(|b| b == b'-')
+      .unwrap_or(domain.len() - 1);
+
+  if !domain[..last_dash].is_ascii() {
+    return Err(not_basic_err());
+  }
+
+  idna::punycode::decode_to_string(&domain)
+    .ok_or_else(|| deno_core::error::range_error("Invalid input"))
 }
 
 #[op2]
author	Nathan Whitaker <17734409+nathanwhit@users.noreply.github.com>	2024-03-11 15:49:43 -0700
committer	GitHub <noreply@github.com>	2024-03-11 15:49:43 -0700
commit	a77b2987bc90879af30a39ba274df9061cc7fbae (patch)
tree	ad7463374e66eb3aa61e41d96c512e67e717e349 /ext/node/ops/idna.rs
parent	d69aab62b0789dd54b8c09b54af022a38f060b5b (diff)