summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreu Botella <andreu@andreubotella.com>2023-05-16 17:49:35 +0200
committerGitHub <noreply@github.com>2023-05-16 17:49:35 +0200
commit9ba2c4c42fcbadea1f19c67c88b5cbc4c97804f3 (patch)
tree4cd2d0eb702251736af4ca6b9c86bc49ac4a5bf8
parent1c74b41855b85c9ec2ee1d83ac0f6b04e1461788 (diff)
fix(fetch): Correctly decode `multipart/form-data` names and filenames (#19145)
Currently the `multipart/form-data` parser in `Request.prototype.formData` and `Response.prototype.formData` decodes non-ASCII filenames incorrectly, as if they were encoded in Latin-1 rather than UTF-8. This happens because the header section of each `multipart/form-data` entry is decoded as Latin-1 in order to be parsed with `Headers`, which only allows `ByteString`s, but the names and filenames are never decoded correctly. This PR fixes this as a post-processing step. Note that the `multipart/form-data` parsing for this APIs in the Fetch spec is very much underspecified, and it does not specify that names and filenames must be decoded as UTF-8. However, it does require that the bodies of non-`File` entries are decoded as UTF-8, and in browsers, names and filenames always use the same encoding as the body. Closes #19142.
-rw-r--r--cli/tests/unit/body_test.ts53
-rw-r--r--ext/fetch/21_formdata.js33
2 files changed, 81 insertions, 5 deletions
diff --git a/cli/tests/unit/body_test.ts b/cli/tests/unit/body_test.ts
index e7a38b7a6..8aebfadd3 100644
--- a/cli/tests/unit/body_test.ts
+++ b/cli/tests/unit/body_test.ts
@@ -53,6 +53,59 @@ Deno.test(
},
);
+// FormData: non-ASCII names and filenames
+Deno.test(
+ { permissions: { net: true } },
+ async function bodyMultipartFormDataNonAsciiNames() {
+ const boundary = "----01230123";
+ const payload = [
+ `--${boundary}`,
+ `Content-Disposition: form-data; name="文字"`,
+ "",
+ "文字",
+ `--${boundary}`,
+ `Content-Disposition: form-data; name="file"; filename="文字"`,
+ "Content-Type: application/octet-stream",
+ "",
+ "",
+ `--${boundary}--`,
+ ].join("\r\n");
+
+ const body = buildBody(
+ new TextEncoder().encode(payload),
+ new Headers({
+ "Content-Type": `multipart/form-data; boundary=${boundary}`,
+ }),
+ );
+
+ const formData = await body.formData();
+ assert(formData.has("文字"));
+ assertEquals(formData.get("文字"), "文字");
+ assert(formData.has("file"));
+ assert(formData.get("file") instanceof File);
+ assertEquals((formData.get("file") as File).name, "文字");
+ },
+);
+
+// FormData: non-ASCII names and filenames roundtrip
+Deno.test(
+ { permissions: { net: true } },
+ async function bodyMultipartFormDataNonAsciiRoundtrip() {
+ const inFormData = new FormData();
+ inFormData.append("文字", "文字");
+ inFormData.append("file", new File([], "文字"));
+
+ const body = buildBody(inFormData);
+
+ const formData = await body.formData();
+ assert(formData.has("文字"));
+ assertEquals(formData.get("文字"), "文字");
+ assert(formData.has("file"));
+ assert(formData.get("file") instanceof File);
+ assertEquals((formData.get("file") as File).name, "文字");
+ },
+);
+
Deno.test(
{ permissions: { net: true } },
async function bodyURLEncodedFormData() {
diff --git a/ext/fetch/21_formdata.js b/ext/fetch/21_formdata.js
index 330ed92e6..1ddd5f656 100644
--- a/ext/fetch/21_formdata.js
+++ b/ext/fetch/21_formdata.js
@@ -41,6 +41,7 @@ const {
StringPrototypeReplaceAll,
TypeError,
TypedArrayPrototypeSubarray,
+ Uint8Array,
} = primordials;
const entryList = Symbol("entry list");
@@ -358,6 +359,20 @@ function parseContentDisposition(value) {
return params;
}
+/**
+ * Decodes a string containing UTF-8 mistakenly decoded as Latin-1 and
+ * decodes it correctly.
+ * @param {string} latin1String
+ * @returns {string}
+ */
+function decodeLatin1StringAsUtf8(latin1String) {
+ const buffer = new Uint8Array(latin1String.length);
+ for (let i = 0; i < latin1String.length; i++) {
+ buffer[i] = latin1String.charCodeAt(i);
+ }
+ return core.decode(buffer);
+}
+
const CRLF = "\r\n";
const LF = StringPrototypeCodePointAt(CRLF, 1);
const CR = StringPrototypeCodePointAt(CRLF, 0);
@@ -465,23 +480,31 @@ class MultipartParser {
i - boundaryIndex - 1,
);
// https://fetch.spec.whatwg.org/#ref-for-dom-body-formdata
- const filename = MapPrototypeGet(disposition, "filename");
- const name = MapPrototypeGet(disposition, "name");
+ // These are UTF-8 decoded as if it was Latin-1.
+ // TODO(@andreubotella): Maybe we shouldn't be parsing entry headers
+ // as Latin-1.
+ const latin1Filename = MapPrototypeGet(disposition, "filename");
+ const latin1Name = MapPrototypeGet(disposition, "name");
state = 5;
// Reset
boundaryIndex = 0;
headerText = "";
- if (!name) {
+ if (!latin1Name) {
continue; // Skip, unknown name
}
- if (filename) {
+ const name = decodeLatin1StringAsUtf8(latin1Name);
+ if (latin1Filename) {
const blob = new Blob([content], {
type: headers.get("Content-Type") || "application/octet-stream",
});
- formData.append(name, blob, filename);
+ formData.append(
+ name,
+ blob,
+ decodeLatin1StringAsUtf8(latin1Filename),
+ );
} else {
formData.append(name, core.decode(content));
}