diff options
author | Nathan Whitaker <17734409+nathanwhit@users.noreply.github.com> | 2024-03-11 15:49:43 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-11 15:49:43 -0700 |
commit | a77b2987bc90879af30a39ba274df9061cc7fbae (patch) | |
tree | ad7463374e66eb3aa61e41d96c512e67e717e349 /ext/node | |
parent | d69aab62b0789dd54b8c09b54af022a38f060b5b (diff) |
fix(ext/node): Match punycode module behavior to node (#22847)
Fixes #19214.
We were using the `idna` crate to implement our polyfill for
`punycode.toASCII` and `punycode.toUnicode`. The `idna` crate is
correct, and adheres to the IDNA2003/2008 spec, but it turns out
`node`'s implementations don't really follow any spec! Instead, node
splits the domain by `'.'` and punycode encodes/decodes each part. This
means that node's implementations will happily work on codepoints that
are disallowed by the IDNA specs, causing the error in #19214.
While fixing this, I went ahead and matched the node behavior on all of
the punycode functions and enabled node's punycode test in our
`node_compat` suite.
Diffstat (limited to 'ext/node')
-rw-r--r-- | ext/node/lib.rs | 2 | ||||
-rw-r--r-- | ext/node/ops/idna.rs | 141 | ||||
-rw-r--r-- | ext/node/polyfills/dns.ts | 12 | ||||
-rw-r--r-- | ext/node/polyfills/internal/idna.ts | 19 | ||||
-rw-r--r-- | ext/node/polyfills/punycode.ts | 24 | ||||
-rw-r--r-- | ext/node/polyfills/url.ts | 13 |
6 files changed, 192 insertions, 19 deletions
diff --git a/ext/node/lib.rs b/ext/node/lib.rs index 6d5a21ace..f9553a038 100644 --- a/ext/node/lib.rs +++ b/ext/node/lib.rs @@ -261,6 +261,8 @@ deno_core::extension!(deno_node, ops::v8::op_vm_run_in_new_context, ops::idna::op_node_idna_domain_to_ascii, ops::idna::op_node_idna_domain_to_unicode, + ops::idna::op_node_idna_punycode_to_ascii, + ops::idna::op_node_idna_punycode_to_unicode, ops::idna::op_node_idna_punycode_decode, ops::idna::op_node_idna_punycode_encode, ops::zlib::op_zlib_new, diff --git a/ext/node/ops/idna.rs b/ext/node/ops/idna.rs index 884e812cc..9c9450c70 100644 --- a/ext/node/ops/idna.rs +++ b/ext/node/ops/idna.rs @@ -1,16 +1,126 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. -use deno_core::error::AnyError; +use deno_core::anyhow::Error; +use deno_core::error::range_error; use deno_core::op2; +use std::borrow::Cow; + +// map_domain, to_ascii and to_unicode are based on the punycode implementation in node.js +// https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js + +const PUNY_PREFIX: &str = "xn--"; + +fn invalid_input_err() -> Error { + range_error("Invalid input") +} + +fn not_basic_err() -> Error { + range_error("Illegal input >= 0x80 (not a basic code point)") +} + +/// map a domain by mapping each label with the given function +fn map_domain<E>( + domain: &str, + f: impl Fn(&str) -> Result<Cow<'_, str>, E>, +) -> Result<String, E> { + let mut result = String::with_capacity(domain.len()); + let mut domain = domain; + + // if it's an email, leave the local part as is + let mut parts = domain.split('@'); + if let (Some(local), Some(remaining)) = (parts.next(), parts.next()) { + result.push_str(local); + result.push('@'); + domain = remaining; + } + + // split into labels and map each one + for (i, label) in domain.split('.').enumerate() { + if i > 0 { + result.push('.'); + } + result.push_str(&f(label)?); + } + Ok(result) +} + +/// Maps a unicode domain to ascii by punycode encoding each label +/// +/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation +fn to_ascii(input: &str) -> Result<String, Error> { + if input.is_ascii() { + return Ok(input.into()); + } + + let mut result = String::with_capacity(input.len()); // at least as long as input + + let rest = map_domain(input, |label| { + if label.is_ascii() { + Ok(label.into()) + } else { + idna::punycode::encode_str(label) + .map(|encoded| [PUNY_PREFIX, &encoded].join("").into()) // add the prefix + .ok_or_else(|| { + Error::msg("Input would take more than 63 characters to encode") // only error possible per the docs + }) + } + })?; + + result.push_str(&rest); + Ok(result) +} + +/// Maps an ascii domain to unicode by punycode decoding each label +/// +/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation +fn to_unicode(input: &str) -> Result<String, Error> { + map_domain(input, |s| { + if let Some(puny) = s.strip_prefix(PUNY_PREFIX) { + // it's a punycode encoded label + Ok( + idna::punycode::decode_to_string(&puny.to_lowercase()) + .ok_or_else(invalid_input_err)? + .into(), + ) + } else { + Ok(s.into()) + } + }) +} + +/// Converts a domain to unicode with behavior that is +/// compatible with the `punycode` module in node.js +#[op2] +#[string] +pub fn op_node_idna_punycode_to_ascii( + #[string] domain: String, +) -> Result<String, Error> { + to_ascii(&domain) +} + +/// Converts a domain to ASCII with behavior that is +/// compatible with the `punycode` module in node.js +#[op2] +#[string] +pub fn op_node_idna_punycode_to_unicode( + #[string] domain: String, +) -> Result<String, Error> { + to_unicode(&domain) +} + +/// Converts a domain to ASCII as per the IDNA spec +/// (specifically UTS #46) #[op2] #[string] pub fn op_node_idna_domain_to_ascii( #[string] domain: String, -) -> Result<String, AnyError> { - Ok(idna::domain_to_ascii(&domain)?) +) -> Result<String, Error> { + idna::domain_to_ascii(&domain).map_err(|e| e.into()) } +/// Converts a domain to Unicode as per the IDNA spec +/// (specifically UTS #46) #[op2] #[string] pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String { @@ -19,8 +129,29 @@ pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String { #[op2] #[string] -pub fn op_node_idna_punycode_decode(#[string] domain: String) -> String { - idna::punycode::decode_to_string(&domain).unwrap_or_default() +pub fn op_node_idna_punycode_decode( + #[string] domain: String, +) -> Result<String, Error> { + if domain.is_empty() { + return Ok(domain); + } + + // all code points before the last delimiter must be basic + // see https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js#L215-L227 + let last_dash = domain.len() + - 1 + - domain + .bytes() + .rev() + .position(|b| b == b'-') + .unwrap_or(domain.len() - 1); + + if !domain[..last_dash].is_ascii() { + return Err(not_basic_err()); + } + + idna::punycode::decode_to_string(&domain) + .ok_or_else(|| deno_core::error::range_error("Invalid input")) } #[op2] diff --git a/ext/node/polyfills/dns.ts b/ext/node/polyfills/dns.ts index 3b3565cb3..78b934e60 100644 --- a/ext/node/polyfills/dns.ts +++ b/ext/node/polyfills/dns.ts @@ -92,7 +92,7 @@ import { GetAddrInfoReqWrap, QueryReqWrap, } from "ext:deno_node/internal_binding/cares_wrap.ts"; -import { toASCII } from "node:punycode"; +import { domainToASCII } from "ext:deno_node/internal/idna.ts"; import { notImplemented } from "ext:deno_node/_utils.ts"; function onlookup( @@ -264,7 +264,13 @@ export function lookup( req.hostname = hostname; req.oncomplete = all ? onlookupall : onlookup; - const err = getaddrinfo(req, toASCII(hostname), family, hints, verbatim); + const err = getaddrinfo( + req, + domainToASCII(hostname), + family, + hints, + verbatim, + ); if (err) { nextTick( @@ -332,7 +338,7 @@ function resolver(bindingName: keyof ChannelWrapQuery) { req.ttl = !!(options && (options as ResolveOptions).ttl); - const err = this._handle[bindingName](req, toASCII(name)); + const err = this._handle[bindingName](req, domainToASCII(name)); if (err) { throw dnsException(err, bindingName, name); diff --git a/ext/node/polyfills/internal/idna.ts b/ext/node/polyfills/internal/idna.ts index 6484fe951..93ed065cc 100644 --- a/ext/node/polyfills/internal/idna.ts +++ b/ext/node/polyfills/internal/idna.ts @@ -51,6 +51,11 @@ "use strict"; +import { + op_node_idna_domain_to_ascii, + op_node_idna_domain_to_unicode, +} from "ext:core/ops"; + /** * Creates an array containing the numeric code points of each Unicode * character in the string. While JavaScript uses UCS-2 internally, @@ -105,3 +110,17 @@ export const ucs2 = { decode: ucs2decode, encode: ucs2encode, }; + +/** + * Converts a domain to ASCII as per the IDNA spec + */ +export function domainToASCII(domain: string) { + return op_node_idna_domain_to_ascii(domain); +} + +/** + * Converts a domain to Unicode as per the IDNA spec + */ +export function domainToUnicode(domain: string) { + return op_node_idna_domain_to_unicode(domain); +} diff --git a/ext/node/polyfills/punycode.ts b/ext/node/polyfills/punycode.ts index 6f137d31f..e89be15a2 100644 --- a/ext/node/polyfills/punycode.ts +++ b/ext/node/polyfills/punycode.ts @@ -1,28 +1,40 @@ // Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. import { - op_node_idna_domain_to_ascii, - op_node_idna_domain_to_unicode, op_node_idna_punycode_decode, op_node_idna_punycode_encode, + op_node_idna_punycode_to_ascii, + op_node_idna_punycode_to_unicode, } from "ext:core/ops"; +import { deprecate } from "node:util"; + import { ucs2 } from "ext:deno_node/internal/idna.ts"; +// deno-lint-ignore no-explicit-any +function punyDeprecated(fn: any) { + return deprecate( + fn, + "The `punycode` module is deprecated. Please use a userland " + + "alternative instead.", + "DEP0040", + ); +} + function toASCII(domain) { - return op_node_idna_domain_to_ascii(domain); + return punyDeprecated(op_node_idna_punycode_to_ascii)(domain); } function toUnicode(domain) { - return op_node_idna_domain_to_unicode(domain); + return punyDeprecated(op_node_idna_punycode_to_unicode)(domain); } function decode(domain) { - return op_node_idna_punycode_decode(domain); + return punyDeprecated(op_node_idna_punycode_decode)(domain); } function encode(domain) { - return op_node_idna_punycode_encode(domain); + return punyDeprecated(op_node_idna_punycode_encode)(domain); } export { decode, encode, toASCII, toUnicode, ucs2 }; diff --git a/ext/node/polyfills/url.ts b/ext/node/polyfills/url.ts index 14195d146..6633334ba 100644 --- a/ext/node/polyfills/url.ts +++ b/ext/node/polyfills/url.ts @@ -70,7 +70,10 @@ import { CHAR_ZERO_WIDTH_NOBREAK_SPACE, } from "ext:deno_node/path/_constants.ts"; import * as path from "node:path"; -import { toASCII, toUnicode } from "node:punycode"; +import { + domainToASCII as idnaToASCII, + domainToUnicode as idnaToUnicode, +} from "ext:deno_node/internal/idna.ts"; import { isWindows, osType } from "ext:deno_node/_util/os.ts"; import { encodeStr, hexTable } from "ext:deno_node/internal/querystring.ts"; import querystring from "node:querystring"; @@ -813,7 +816,7 @@ export class Url { // Use lenient mode (`true`) to try to support even non-compliant // URLs. - this.hostname = toASCII(this.hostname); + this.hostname = idnaToASCII(this.hostname); // Prevent two potential routes of hostname spoofing. // 1. If this.hostname is empty, it must have become empty due to toASCII @@ -1251,7 +1254,7 @@ export function resolveObject(source: string | Url, relative: string) { * @see https://www.rfc-editor.org/rfc/rfc3490#section-4 */ export function domainToASCII(domain: string) { - return toASCII(domain); + return idnaToASCII(domain); } /** @@ -1261,7 +1264,7 @@ export function domainToASCII(domain: string) { * @see https://www.rfc-editor.org/rfc/rfc3490#section-4 */ export function domainToUnicode(domain: string) { - return toUnicode(domain); + return idnaToUnicode(domain); } /** @@ -1396,7 +1399,7 @@ export function pathToFileURL(filepath: string): URL { ); } - outURL.hostname = domainToASCII(hostname); + outURL.hostname = idnaToASCII(hostname); outURL.pathname = encodePathChars(paths.slice(3).join("/")); } else { let resolved = path.resolve(filepath); |