diff options
Diffstat (limited to 'cli')
-rw-r--r-- | cli/Cargo.toml | 1 | ||||
-rw-r--r-- | cli/js/ops/idna.ts | 12 | ||||
-rw-r--r-- | cli/js/web/url.ts | 97 | ||||
-rw-r--r-- | cli/ops/idna.rs | 43 | ||||
-rw-r--r-- | cli/ops/mod.rs | 1 | ||||
-rw-r--r-- | cli/tests/unit/url_test.ts | 52 | ||||
-rw-r--r-- | cli/web_worker.rs | 1 | ||||
-rw-r--r-- | cli/worker.rs | 1 |
8 files changed, 159 insertions, 49 deletions
diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 64688d5d0..b02f68825 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -33,6 +33,7 @@ dlopen = "0.1.8" dprint-plugin-typescript = "0.19.5" futures = { version = "0.3.5", features = ["compat", "io-compat"] } http = "0.2.1" +idna = "0.2.0" indexmap = "1.4.0" lazy_static = "1.4.0" libc = "0.2.71" diff --git a/cli/js/ops/idna.ts b/cli/js/ops/idna.ts new file mode 100644 index 000000000..8459ca29c --- /dev/null +++ b/cli/js/ops/idna.ts @@ -0,0 +1,12 @@ +// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. + +/** https://url.spec.whatwg.org/#idna */ + +import { sendSync } from "./dispatch_json.ts"; + +export function domainToAscii( + domain: string, + { beStrict = false }: { beStrict?: boolean } = {} +): string { + return sendSync("op_domain_to_ascii", { domain, beStrict }); +} diff --git a/cli/js/web/url.ts b/cli/js/web/url.ts index b12f6da75..e429fddbb 100644 --- a/cli/js/web/url.ts +++ b/cli/js/web/url.ts @@ -1,11 +1,14 @@ // Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. import { build } from "../build.ts"; import { getRandomValues } from "../ops/get_random_values.ts"; +import { domainToAscii } from "../ops/idna.ts"; import { customInspect } from "./console.ts"; +import { TextEncoder } from "./text_encoding.ts"; import { urls } from "./url_search_params.ts"; interface URLParts { protocol: string; + slashes: string; username: string; password: string; hostname: string; @@ -57,7 +60,9 @@ function parse(url: string, isBase = true): URLParts | undefined { if (isBase && parts.protocol == "") { return undefined; } + const isSpecial = specialSchemes.includes(parts.protocol); if (parts.protocol == "file") { + parts.slashes = "//"; parts.username = ""; parts.password = ""; [parts.hostname, restUrl] = takePattern(restUrl, /^[/\\]{2}([^/\\?#]*)/); @@ -68,7 +73,8 @@ function parse(url: string, isBase = true): URLParts | undefined { // equivalent to: `new URL("file://localhost/foo/bar")`. [parts.hostname, restUrl] = takePattern(restUrl, /^[/\\]{2,}([^/\\?#]*)/); } - } else if (specialSchemes.includes(parts.protocol)) { + } else if (isSpecial) { + parts.slashes = "//"; let restAuthority; [restAuthority, restUrl] = takePattern(restUrl, /^[/\\]{2,}([^/\\?#]+)/); if (isBase && restAuthority == "") { @@ -92,17 +98,18 @@ function parse(url: string, isBase = true): URLParts | undefined { return undefined; } } else { + [parts.slashes, restUrl] = takePattern(restUrl, /^([/\\]{2})/); parts.username = ""; parts.password = ""; - parts.hostname = ""; + if (parts.slashes) { + [parts.hostname, restUrl] = takePattern(restUrl, /^([^/\\?#]*)/); + } else { + parts.hostname = ""; + } parts.port = ""; } try { - const IPv6re = /^\[[0-9a-fA-F.:]{2,}\]$/; - if (!IPv6re.test(parts.hostname)) { - parts.hostname = encodeHostname(parts.hostname); // Non-IPv6 URLs - } - parts.hostname = parts.hostname.toLowerCase(); + parts.hostname = encodeHostname(parts.hostname, isSpecial); } catch { return undefined; } @@ -298,7 +305,8 @@ export class URLImpl implements URL { set hostname(value: string) { value = String(value); try { - parts.get(this)!.hostname = encodeHostname(value); + const isSpecial = specialSchemes.includes(parts.get(this)!.protocol); + parts.get(this)!.hostname = encodeHostname(value, isSpecial); } catch {} } @@ -307,11 +315,9 @@ export class URLImpl implements URL { this.username || this.password ? `${this.username}${this.password ? ":" + this.password : ""}@` : ""; - let slash = ""; - if (this.host || this.protocol === "file:") { - slash = "//"; - } - return `${this.protocol}${slash}${authentication}${this.host}${this.pathname}${this.search}${this.hash}`; + return `${this.protocol}${parts.get(this)!.slashes}${authentication}${ + this.host + }${this.pathname}${this.search}${this.hash}`; } set href(value: string) { @@ -427,6 +433,7 @@ export class URLImpl implements URL { } else if (baseParts) { parts.set(this, { protocol: baseParts.protocol, + slashes: baseParts.slashes, username: baseParts.username, password: baseParts.password, hostname: baseParts.hostname, @@ -479,7 +486,7 @@ export class URLImpl implements URL { } function charInC0ControlSet(c: string): boolean { - return c >= "\u0000" && c <= "\u001F"; + return (c >= "\u0000" && c <= "\u001F") || c > "\u007E"; } function charInSearchSet(c: string): boolean { @@ -503,20 +510,72 @@ function charInUserinfoSet(c: string): boolean { return charInPathSet(c) || ["\u0027", "\u002F", "\u003A", "\u003B", "\u003D", "\u0040", "\u005B", "\u005C", "\u005D", "\u005E", "\u007C"].includes(c); } +function charIsForbiddenInHost(c: string): boolean { + // prettier-ignore + return ["\u0000", "\u0009", "\u000A", "\u000D", "\u0020", "\u0023", "\u0025", "\u002F", "\u003A", "\u003C", "\u003E", "\u003F", "\u0040", "\u005B", "\u005C", "\u005D", "\u005E"].includes(c); +} + +const encoder = new TextEncoder(); + function encodeChar(c: string): string { - return `%${c.charCodeAt(0).toString(16)}`.toUpperCase(); + return [...encoder.encode(c)] + .map((n) => `%${n.toString(16)}`) + .join("") + .toUpperCase(); } function encodeUserinfo(s: string): string { return [...s].map((c) => (charInUserinfoSet(c) ? encodeChar(c) : c)).join(""); } -function encodeHostname(s: string): string { - // FIXME: https://url.spec.whatwg.org/#idna - if (s.includes(":")) { +function encodeHostname(s: string, isSpecial = true): string { + // IPv6 parsing. + if (s.startsWith("[") && s.endsWith("]")) { + if (!s.match(/^\[[0-9A-Fa-f.:]{2,}\]$/)) { + throw new TypeError("Invalid hostname."); + } + return s.toLowerCase(); + } + + let result = s; + + if (!isSpecial) { + // Check against forbidden host code points except for "%". + for (const c of result) { + if (charIsForbiddenInHost(c) && c != "\u0025") { + throw new TypeError("Invalid hostname."); + } + } + + // Percent-encode C0 control set. + result = [...result] + .map((c) => (charInC0ControlSet(c) ? encodeChar(c) : c)) + .join(""); + + return result; + } + + // Percent-decode. + if (result.match(/%(?![0-9A-Fa-f]{2})/) != null) { throw new TypeError("Invalid hostname."); } - return encodeURIComponent(s); + result = result.replace(/%(.{2})/g, (_, hex) => + String.fromCodePoint(Number(`0x${hex}`)) + ); + + // IDNA domain to ASCII. + result = domainToAscii(result); + + // Check against forbidden host code points. + for (const c of result) { + if (charIsForbiddenInHost(c)) { + throw new TypeError("Invalid hostname."); + } + } + + // TODO(nayeemrmn): IPv4 parsing. + + return result; } function encodePathname(s: string): string { diff --git a/cli/ops/idna.rs b/cli/ops/idna.rs new file mode 100644 index 000000000..8ecef4862 --- /dev/null +++ b/cli/ops/idna.rs @@ -0,0 +1,43 @@ +// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. + +//! https://url.spec.whatwg.org/#idna + +use super::dispatch_json::{Deserialize, JsonOp, Value}; +use crate::op_error::{ErrorKind, OpError}; +use crate::state::State; +use deno_core::CoreIsolate; +use deno_core::ZeroCopyBuf; +use idna::{domain_to_ascii, domain_to_ascii_strict}; + +pub fn init(i: &mut CoreIsolate, s: &State) { + i.register_op("op_domain_to_ascii", s.stateful_json_op(op_domain_to_ascii)); +} + +fn invalid_domain_error() -> OpError { + OpError { + kind: ErrorKind::TypeError, + msg: "Invalid domain.".to_string(), + } +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct DomainToAscii { + domain: String, + be_strict: bool, +} + +fn op_domain_to_ascii( + _state: &State, + args: Value, + _zero_copy: &mut [ZeroCopyBuf], +) -> Result<JsonOp, OpError> { + let args: DomainToAscii = serde_json::from_value(args)?; + let domain = if args.be_strict { + domain_to_ascii_strict(args.domain.as_str()) + .map_err(|_| invalid_domain_error())? + } else { + domain_to_ascii(args.domain.as_str()).map_err(|_| invalid_domain_error())? + }; + Ok(JsonOp::Sync(json!(domain))) +} diff --git a/cli/ops/mod.rs b/cli/ops/mod.rs index a53e5ac16..ef8c3bd0f 100644 --- a/cli/ops/mod.rs +++ b/cli/ops/mod.rs @@ -13,6 +13,7 @@ pub mod errors; pub mod fetch; pub mod fs; pub mod fs_events; +pub mod idna; pub mod io; pub mod net; #[cfg(unix)] diff --git a/cli/tests/unit/url_test.ts b/cli/tests/unit/url_test.ts index d22787f1c..177f605c1 100644 --- a/cli/tests/unit/url_test.ts +++ b/cli/tests/unit/url_test.ts @@ -25,32 +25,25 @@ unitTest(function urlParsing(): void { String(url), "https://foo:bar@baz.qat:8000/qux/quux?foo=bar&baz=12#qat" ); - assertEquals( - JSON.stringify({ key: url }), - `{"key":"https://foo:bar@baz.qat:8000/qux/quux?foo=bar&baz=12#qat"}` - ); +}); - // IPv6 type hostname. - const urlv6 = new URL( - "https://foo:bar@[::1]:8000/qux/quux?foo=bar&baz=12#qat" - ); - assertEquals(urlv6.origin, "https://[::1]:8000"); - assertEquals(urlv6.password, "bar"); - assertEquals(urlv6.pathname, "/qux/quux"); - assertEquals(urlv6.port, "8000"); - assertEquals(urlv6.protocol, "https:"); - assertEquals(urlv6.search, "?foo=bar&baz=12"); - assertEquals(urlv6.searchParams.getAll("foo"), ["bar"]); - assertEquals(urlv6.searchParams.getAll("baz"), ["12"]); - assertEquals(urlv6.username, "foo"); - assertEquals( - String(urlv6), - "https://foo:bar@[::1]:8000/qux/quux?foo=bar&baz=12#qat" - ); - assertEquals( - JSON.stringify({ key: urlv6 }), - `{"key":"https://foo:bar@[::1]:8000/qux/quux?foo=bar&baz=12#qat"}` - ); +unitTest(function urlHostParsing(): void { + // IPv6. + assertEquals(new URL("https://foo:bar@[::1]:8000").hostname, "[::1]"); + + // Forbidden host code point. + assertThrows(() => new URL("https:// a"), TypeError, "Invalid URL."); + assertThrows(() => new URL("abcde:// a"), TypeError, "Invalid URL."); + assertThrows(() => new URL("https://%"), TypeError, "Invalid URL."); + assertEquals(new URL("abcde://%").hostname, "%"); + + // Percent-decode. + assertEquals(new URL("https://%21").hostname, "!"); + assertEquals(new URL("abcde://%21").hostname, "%21"); + + // TODO(nayeemrmn): IPv4 parsing. + // assertEquals(new URL("https://260").hostname, "0.0.1.4"); + assertEquals(new URL("abcde://260").hostname, "260"); }); unitTest(function urlModifications(): void { @@ -208,6 +201,7 @@ unitTest(function urlUncHostname() { unitTest(function urlHostnameUpperCase() { assertEquals(new URL("https://EXAMPLE.COM").href, "https://example.com/"); + assertEquals(new URL("abcde://EXAMPLE.COM").href, "abcde://EXAMPLE.COM/"); }); unitTest(function urlTrim() { @@ -223,11 +217,9 @@ unitTest(function urlEncoding() { new URL("https://:a !$&*()=,;+'\"@example.com").password, "a%20!$&*()%3D,%3B+%27%22" ); - // FIXME: https://url.spec.whatwg.org/#idna - // assertEquals( - // new URL("https://a !$&*()=,+'\"").hostname, - // "a%20%21%24%26%2A%28%29%3D%2C+%27%22" - // ); + assertEquals(new URL("abcde://mañana/c?d#e").hostname, "ma%C3%B1ana"); + // https://url.spec.whatwg.org/#idna + assertEquals(new URL("https://mañana/c?d#e").hostname, "xn--maana-pta"); assertEquals( new URL("https://example.com/a ~!@$&*()=:/,;+'\"\\").pathname, "/a%20~!@$&*()=:/,;+'%22/" diff --git a/cli/web_worker.rs b/cli/web_worker.rs index de4cd91f9..9ac26fca0 100644 --- a/cli/web_worker.rs +++ b/cli/web_worker.rs @@ -121,6 +121,7 @@ impl WebWorker { handle, ); ops::worker_host::init(isolate, &state); + ops::idna::init(isolate, &state); ops::io::init(isolate, &state); ops::resources::init(isolate, &state); ops::errors::init(isolate, &state); diff --git a/cli/worker.rs b/cli/worker.rs index 08367da91..6076b1cb6 100644 --- a/cli/worker.rs +++ b/cli/worker.rs @@ -261,6 +261,7 @@ impl MainWorker { ops::fetch::init(isolate, &state); ops::fs::init(isolate, &state); ops::fs_events::init(isolate, &state); + ops::idna::init(isolate, &state); ops::io::init(isolate, &state); ops::plugin::init(isolate, &state); ops::net::init(isolate, &state); |