diff options
author | Nathan Whitaker <17734409+nathanwhit@users.noreply.github.com> | 2024-03-11 15:49:43 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-11 15:49:43 -0700 |
commit | a77b2987bc90879af30a39ba274df9061cc7fbae (patch) | |
tree | ad7463374e66eb3aa61e41d96c512e67e717e349 /ext/node/ops/idna.rs | |
parent | d69aab62b0789dd54b8c09b54af022a38f060b5b (diff) |
fix(ext/node): Match punycode module behavior to node (#22847)
Fixes #19214.
We were using the `idna` crate to implement our polyfill for
`punycode.toASCII` and `punycode.toUnicode`. The `idna` crate is
correct, and adheres to the IDNA2003/2008 spec, but it turns out
`node`'s implementations don't really follow any spec! Instead, node
splits the domain by `'.'` and punycode encodes/decodes each part. This
means that node's implementations will happily work on codepoints that
are disallowed by the IDNA specs, causing the error in #19214.
While fixing this, I went ahead and matched the node behavior on all of
the punycode functions and enabled node's punycode test in our
`node_compat` suite.
Diffstat (limited to 'ext/node/ops/idna.rs')
-rw-r--r-- | ext/node/ops/idna.rs | 141 |
1 files changed, 136 insertions, 5 deletions
diff --git a/ext/node/ops/idna.rs b/ext/node/ops/idna.rs index 884e812cc..9c9450c70 100644 --- a/ext/node/ops/idna.rs +++ b/ext/node/ops/idna.rs @@ -1,16 +1,126 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use deno_core::error::AnyError; +use deno_core::anyhow::Error; +use deno_core::error::range_error; use deno_core::op2; +use std::borrow::Cow; + +// map_domain, to_ascii and to_unicode are based on the punycode implementation in node.js +// https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js + +const PUNY_PREFIX: &str = "xn--"; + +fn invalid_input_err() -> Error { + range_error("Invalid input") +} + +fn not_basic_err() -> Error { + range_error("Illegal input >= 0x80 (not a basic code point)") +} + +/// map a domain by mapping each label with the given function +fn map_domain<E>( + domain: &str, + f: impl Fn(&str) -> Result<Cow<'_, str>, E>, +) -> Result<String, E> { + let mut result = String::with_capacity(domain.len()); + let mut domain = domain; + + // if it's an email, leave the local part as is + let mut parts = domain.split('@'); + if let (Some(local), Some(remaining)) = (parts.next(), parts.next()) { + result.push_str(local); + result.push('@'); + domain = remaining; + } + + // split into labels and map each one + for (i, label) in domain.split('.').enumerate() { + if i > 0 { + result.push('.'); + } + result.push_str(&f(label)?); + } + Ok(result) +} + +/// Maps a unicode domain to ascii by punycode encoding each label +/// +/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation +fn to_ascii(input: &str) -> Result<String, Error> { + if input.is_ascii() { + return Ok(input.into()); + } + + let mut result = String::with_capacity(input.len()); // at least as long as input + + let rest = map_domain(input, |label| { + if label.is_ascii() { + Ok(label.into()) + } else { + idna::punycode::encode_str(label) + .map(|encoded| [PUNY_PREFIX, &encoded].join("").into()) // add the prefix + .ok_or_else(|| { + Error::msg("Input would take more than 63 characters to encode") // only error possible per the docs + }) + } + })?; + + result.push_str(&rest); + Ok(result) +} + +/// Maps an ascii domain to unicode by punycode decoding each label +/// +/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation +fn to_unicode(input: &str) -> Result<String, Error> { + map_domain(input, |s| { + if let Some(puny) = s.strip_prefix(PUNY_PREFIX) { + // it's a punycode encoded label + Ok( + idna::punycode::decode_to_string(&puny.to_lowercase()) + .ok_or_else(invalid_input_err)? + .into(), + ) + } else { + Ok(s.into()) + } + }) +} + +/// Converts a domain to unicode with behavior that is +/// compatible with the `punycode` module in node.js +#[op2] +#[string] +pub fn op_node_idna_punycode_to_ascii( + #[string] domain: String, +) -> Result<String, Error> { + to_ascii(&domain) +} + +/// Converts a domain to ASCII with behavior that is +/// compatible with the `punycode` module in node.js +#[op2] +#[string] +pub fn op_node_idna_punycode_to_unicode( + #[string] domain: String, +) -> Result<String, Error> { + to_unicode(&domain) +} + +/// Converts a domain to ASCII as per the IDNA spec +/// (specifically UTS #46) #[op2] #[string] pub fn op_node_idna_domain_to_ascii( #[string] domain: String, -) -> Result<String, AnyError> { - Ok(idna::domain_to_ascii(&domain)?) +) -> Result<String, Error> { + idna::domain_to_ascii(&domain).map_err(|e| e.into()) } +/// Converts a domain to Unicode as per the IDNA spec +/// (specifically UTS #46) #[op2] #[string] pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String { @@ -19,8 +129,29 @@ pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String { #[op2] #[string] -pub fn op_node_idna_punycode_decode(#[string] domain: String) -> String { - idna::punycode::decode_to_string(&domain).unwrap_or_default() +pub fn op_node_idna_punycode_decode( + #[string] domain: String, +) -> Result<String, Error> { + if domain.is_empty() { + return Ok(domain); + } + + // all code points before the last delimiter must be basic + // see https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js#L215-L227 + let last_dash = domain.len() + - 1 + - domain + .bytes() + .rev() + .position(|b| b == b'-') + .unwrap_or(domain.len() - 1); + + if !domain[..last_dash].is_ascii() { + return Err(not_basic_err()); + } + + idna::punycode::decode_to_string(&domain) + .ok_or_else(|| deno_core::error::range_error("Invalid input")) } #[op2] |