summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorNathan Whitaker <17734409+nathanwhit@users.noreply.github.com>2024-03-11 15:49:43 -0700
committerGitHub <noreply@github.com>2024-03-11 15:49:43 -0700
commita77b2987bc90879af30a39ba274df9061cc7fbae (patch)
treead7463374e66eb3aa61e41d96c512e67e717e349 /ext
parentd69aab62b0789dd54b8c09b54af022a38f060b5b (diff)
fix(ext/node): Match punycode module behavior to node (#22847)
Fixes #19214. We were using the `idna` crate to implement our polyfill for `punycode.toASCII` and `punycode.toUnicode`. The `idna` crate is correct, and adheres to the IDNA2003/2008 spec, but it turns out `node`'s implementations don't really follow any spec! Instead, node splits the domain by `'.'` and punycode encodes/decodes each part. This means that node's implementations will happily work on codepoints that are disallowed by the IDNA specs, causing the error in #19214. While fixing this, I went ahead and matched the node behavior on all of the punycode functions and enabled node's punycode test in our `node_compat` suite.
Diffstat (limited to 'ext')
-rw-r--r--ext/node/lib.rs2
-rw-r--r--ext/node/ops/idna.rs141
-rw-r--r--ext/node/polyfills/dns.ts12
-rw-r--r--ext/node/polyfills/internal/idna.ts19
-rw-r--r--ext/node/polyfills/punycode.ts24
-rw-r--r--ext/node/polyfills/url.ts13
6 files changed, 192 insertions, 19 deletions
diff --git a/ext/node/lib.rs b/ext/node/lib.rs
index 6d5a21ace..f9553a038 100644
--- a/ext/node/lib.rs
+++ b/ext/node/lib.rs
@@ -261,6 +261,8 @@ deno_core::extension!(deno_node,
ops::v8::op_vm_run_in_new_context,
ops::idna::op_node_idna_domain_to_ascii,
ops::idna::op_node_idna_domain_to_unicode,
+ ops::idna::op_node_idna_punycode_to_ascii,
+ ops::idna::op_node_idna_punycode_to_unicode,
ops::idna::op_node_idna_punycode_decode,
ops::idna::op_node_idna_punycode_encode,
ops::zlib::op_zlib_new,
diff --git a/ext/node/ops/idna.rs b/ext/node/ops/idna.rs
index 884e812cc..9c9450c70 100644
--- a/ext/node/ops/idna.rs
+++ b/ext/node/ops/idna.rs
@@ -1,16 +1,126 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
-use deno_core::error::AnyError;
+use deno_core::anyhow::Error;
+use deno_core::error::range_error;
use deno_core::op2;
+use std::borrow::Cow;
+
+// map_domain, to_ascii and to_unicode are based on the punycode implementation in node.js
+// https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js
+
+const PUNY_PREFIX: &str = "xn--";
+
+fn invalid_input_err() -> Error {
+ range_error("Invalid input")
+}
+
+fn not_basic_err() -> Error {
+ range_error("Illegal input >= 0x80 (not a basic code point)")
+}
+
+/// map a domain by mapping each label with the given function
+fn map_domain<E>(
+ domain: &str,
+ f: impl Fn(&str) -> Result<Cow<'_, str>, E>,
+) -> Result<String, E> {
+ let mut result = String::with_capacity(domain.len());
+ let mut domain = domain;
+
+ // if it's an email, leave the local part as is
+ let mut parts = domain.split('@');
+ if let (Some(local), Some(remaining)) = (parts.next(), parts.next()) {
+ result.push_str(local);
+ result.push('@');
+ domain = remaining;
+ }
+
+ // split into labels and map each one
+ for (i, label) in domain.split('.').enumerate() {
+ if i > 0 {
+ result.push('.');
+ }
+ result.push_str(&f(label)?);
+ }
+ Ok(result)
+}
+
+/// Maps a unicode domain to ascii by punycode encoding each label
+///
+/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation
+fn to_ascii(input: &str) -> Result<String, Error> {
+ if input.is_ascii() {
+ return Ok(input.into());
+ }
+
+ let mut result = String::with_capacity(input.len()); // at least as long as input
+
+ let rest = map_domain(input, |label| {
+ if label.is_ascii() {
+ Ok(label.into())
+ } else {
+ idna::punycode::encode_str(label)
+ .map(|encoded| [PUNY_PREFIX, &encoded].join("").into()) // add the prefix
+ .ok_or_else(|| {
+ Error::msg("Input would take more than 63 characters to encode") // only error possible per the docs
+ })
+ }
+ })?;
+
+ result.push_str(&rest);
+ Ok(result)
+}
+
+/// Maps an ascii domain to unicode by punycode decoding each label
+///
+/// Note this is not IDNA2003 or IDNA2008 compliant, rather it matches node.js's punycode implementation
+fn to_unicode(input: &str) -> Result<String, Error> {
+ map_domain(input, |s| {
+ if let Some(puny) = s.strip_prefix(PUNY_PREFIX) {
+ // it's a punycode encoded label
+ Ok(
+ idna::punycode::decode_to_string(&puny.to_lowercase())
+ .ok_or_else(invalid_input_err)?
+ .into(),
+ )
+ } else {
+ Ok(s.into())
+ }
+ })
+}
+
+/// Converts a domain to unicode with behavior that is
+/// compatible with the `punycode` module in node.js
+#[op2]
+#[string]
+pub fn op_node_idna_punycode_to_ascii(
+ #[string] domain: String,
+) -> Result<String, Error> {
+ to_ascii(&domain)
+}
+
+/// Converts a domain to ASCII with behavior that is
+/// compatible with the `punycode` module in node.js
+#[op2]
+#[string]
+pub fn op_node_idna_punycode_to_unicode(
+ #[string] domain: String,
+) -> Result<String, Error> {
+ to_unicode(&domain)
+}
+
+/// Converts a domain to ASCII as per the IDNA spec
+/// (specifically UTS #46)
#[op2]
#[string]
pub fn op_node_idna_domain_to_ascii(
#[string] domain: String,
-) -> Result<String, AnyError> {
- Ok(idna::domain_to_ascii(&domain)?)
+) -> Result<String, Error> {
+ idna::domain_to_ascii(&domain).map_err(|e| e.into())
}
+/// Converts a domain to Unicode as per the IDNA spec
+/// (specifically UTS #46)
#[op2]
#[string]
pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String {
@@ -19,8 +129,29 @@ pub fn op_node_idna_domain_to_unicode(#[string] domain: String) -> String {
#[op2]
#[string]
-pub fn op_node_idna_punycode_decode(#[string] domain: String) -> String {
- idna::punycode::decode_to_string(&domain).unwrap_or_default()
+pub fn op_node_idna_punycode_decode(
+ #[string] domain: String,
+) -> Result<String, Error> {
+ if domain.is_empty() {
+ return Ok(domain);
+ }
+
+ // all code points before the last delimiter must be basic
+ // see https://github.com/nodejs/node/blob/73025c4dec042e344eeea7912ed39f7b7c4a3991/lib/punycode.js#L215-L227
+ let last_dash = domain.len()
+ - 1
+ - domain
+ .bytes()
+ .rev()
+ .position(|b| b == b'-')
+ .unwrap_or(domain.len() - 1);
+
+ if !domain[..last_dash].is_ascii() {
+ return Err(not_basic_err());
+ }
+
+ idna::punycode::decode_to_string(&domain)
+ .ok_or_else(|| deno_core::error::range_error("Invalid input"))
}
#[op2]
diff --git a/ext/node/polyfills/dns.ts b/ext/node/polyfills/dns.ts
index 3b3565cb3..78b934e60 100644
--- a/ext/node/polyfills/dns.ts
+++ b/ext/node/polyfills/dns.ts
@@ -92,7 +92,7 @@ import {
GetAddrInfoReqWrap,
QueryReqWrap,
} from "ext:deno_node/internal_binding/cares_wrap.ts";
-import { toASCII } from "node:punycode";
+import { domainToASCII } from "ext:deno_node/internal/idna.ts";
import { notImplemented } from "ext:deno_node/_utils.ts";
function onlookup(
@@ -264,7 +264,13 @@ export function lookup(
req.hostname = hostname;
req.oncomplete = all ? onlookupall : onlookup;
- const err = getaddrinfo(req, toASCII(hostname), family, hints, verbatim);
+ const err = getaddrinfo(
+ req,
+ domainToASCII(hostname),
+ family,
+ hints,
+ verbatim,
+ );
if (err) {
nextTick(
@@ -332,7 +338,7 @@ function resolver(bindingName: keyof ChannelWrapQuery) {
req.ttl = !!(options && (options as ResolveOptions).ttl);
- const err = this._handle[bindingName](req, toASCII(name));
+ const err = this._handle[bindingName](req, domainToASCII(name));
if (err) {
throw dnsException(err, bindingName, name);
diff --git a/ext/node/polyfills/internal/idna.ts b/ext/node/polyfills/internal/idna.ts
index 6484fe951..93ed065cc 100644
--- a/ext/node/polyfills/internal/idna.ts
+++ b/ext/node/polyfills/internal/idna.ts
@@ -51,6 +51,11 @@
"use strict";
+import {
+ op_node_idna_domain_to_ascii,
+ op_node_idna_domain_to_unicode,
+} from "ext:core/ops";
+
/**
* Creates an array containing the numeric code points of each Unicode
* character in the string. While JavaScript uses UCS-2 internally,
@@ -105,3 +110,17 @@ export const ucs2 = {
decode: ucs2decode,
encode: ucs2encode,
};
+
+/**
+ * Converts a domain to ASCII as per the IDNA spec
+ */
+export function domainToASCII(domain: string) {
+ return op_node_idna_domain_to_ascii(domain);
+}
+
+/**
+ * Converts a domain to Unicode as per the IDNA spec
+ */
+export function domainToUnicode(domain: string) {
+ return op_node_idna_domain_to_unicode(domain);
+}
diff --git a/ext/node/polyfills/punycode.ts b/ext/node/polyfills/punycode.ts
index 6f137d31f..e89be15a2 100644
--- a/ext/node/polyfills/punycode.ts
+++ b/ext/node/polyfills/punycode.ts
@@ -1,28 +1,40 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
import {
- op_node_idna_domain_to_ascii,
- op_node_idna_domain_to_unicode,
op_node_idna_punycode_decode,
op_node_idna_punycode_encode,
+ op_node_idna_punycode_to_ascii,
+ op_node_idna_punycode_to_unicode,
} from "ext:core/ops";
+import { deprecate } from "node:util";
+
import { ucs2 } from "ext:deno_node/internal/idna.ts";
+// deno-lint-ignore no-explicit-any
+function punyDeprecated(fn: any) {
+ return deprecate(
+ fn,
+ "The `punycode` module is deprecated. Please use a userland " +
+ "alternative instead.",
+ "DEP0040",
+ );
+}
+
function toASCII(domain) {
- return op_node_idna_domain_to_ascii(domain);
+ return punyDeprecated(op_node_idna_punycode_to_ascii)(domain);
}
function toUnicode(domain) {
- return op_node_idna_domain_to_unicode(domain);
+ return punyDeprecated(op_node_idna_punycode_to_unicode)(domain);
}
function decode(domain) {
- return op_node_idna_punycode_decode(domain);
+ return punyDeprecated(op_node_idna_punycode_decode)(domain);
}
function encode(domain) {
- return op_node_idna_punycode_encode(domain);
+ return punyDeprecated(op_node_idna_punycode_encode)(domain);
}
export { decode, encode, toASCII, toUnicode, ucs2 };
diff --git a/ext/node/polyfills/url.ts b/ext/node/polyfills/url.ts
index 14195d146..6633334ba 100644
--- a/ext/node/polyfills/url.ts
+++ b/ext/node/polyfills/url.ts
@@ -70,7 +70,10 @@ import {
CHAR_ZERO_WIDTH_NOBREAK_SPACE,
} from "ext:deno_node/path/_constants.ts";
import * as path from "node:path";
-import { toASCII, toUnicode } from "node:punycode";
+import {
+ domainToASCII as idnaToASCII,
+ domainToUnicode as idnaToUnicode,
+} from "ext:deno_node/internal/idna.ts";
import { isWindows, osType } from "ext:deno_node/_util/os.ts";
import { encodeStr, hexTable } from "ext:deno_node/internal/querystring.ts";
import querystring from "node:querystring";
@@ -813,7 +816,7 @@ export class Url {
// Use lenient mode (`true`) to try to support even non-compliant
// URLs.
- this.hostname = toASCII(this.hostname);
+ this.hostname = idnaToASCII(this.hostname);
// Prevent two potential routes of hostname spoofing.
// 1. If this.hostname is empty, it must have become empty due to toASCII
@@ -1251,7 +1254,7 @@ export function resolveObject(source: string | Url, relative: string) {
* @see https://www.rfc-editor.org/rfc/rfc3490#section-4
*/
export function domainToASCII(domain: string) {
- return toASCII(domain);
+ return idnaToASCII(domain);
}
/**
@@ -1261,7 +1264,7 @@ export function domainToASCII(domain: string) {
* @see https://www.rfc-editor.org/rfc/rfc3490#section-4
*/
export function domainToUnicode(domain: string) {
- return toUnicode(domain);
+ return idnaToUnicode(domain);
}
/**
@@ -1396,7 +1399,7 @@ export function pathToFileURL(filepath: string): URL {
);
}
- outURL.hostname = domainToASCII(hostname);
+ outURL.hostname = idnaToASCII(hostname);
outURL.pathname = encodePathChars(paths.slice(3).join("/"));
} else {
let resolved = path.resolve(filepath);