diff options
author | Andreu Botella <andreu@andreubotella.com> | 2023-05-16 17:49:35 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-16 17:49:35 +0200 |
commit | 9ba2c4c42fcbadea1f19c67c88b5cbc4c97804f3 (patch) | |
tree | 4cd2d0eb702251736af4ca6b9c86bc49ac4a5bf8 | |
parent | 1c74b41855b85c9ec2ee1d83ac0f6b04e1461788 (diff) |
fix(fetch): Correctly decode `multipart/form-data` names and filenames (#19145)
Currently the `multipart/form-data` parser in
`Request.prototype.formData` and `Response.prototype.formData` decodes
non-ASCII filenames incorrectly, as if they were encoded in Latin-1
rather than UTF-8. This happens because the header section of each
`multipart/form-data` entry is decoded as Latin-1 in order to be parsed
with `Headers`, which only allows `ByteString`s, but the names and
filenames are never decoded correctly. This PR fixes this as a
post-processing step.
Note that the `multipart/form-data` parsing for this APIs in the Fetch
spec is very much underspecified, and it does not specify that names and
filenames must be decoded as UTF-8. However, it does require that the
bodies of non-`File` entries are decoded as UTF-8, and in browsers,
names and filenames always use the same encoding as the body.
Closes #19142.
-rw-r--r-- | cli/tests/unit/body_test.ts | 53 | ||||
-rw-r--r-- | ext/fetch/21_formdata.js | 33 |
2 files changed, 81 insertions, 5 deletions
diff --git a/cli/tests/unit/body_test.ts b/cli/tests/unit/body_test.ts index e7a38b7a6..8aebfadd3 100644 --- a/cli/tests/unit/body_test.ts +++ b/cli/tests/unit/body_test.ts @@ -53,6 +53,59 @@ Deno.test( }, ); +// FormData: non-ASCII names and filenames +Deno.test( + { permissions: { net: true } }, + async function bodyMultipartFormDataNonAsciiNames() { + const boundary = "----01230123"; + const payload = [ + `--${boundary}`, + `Content-Disposition: form-data; name="文字"`, + "", + "文字", + `--${boundary}`, + `Content-Disposition: form-data; name="file"; filename="文字"`, + "Content-Type: application/octet-stream", + "", + "", + `--${boundary}--`, + ].join("\r\n"); + + const body = buildBody( + new TextEncoder().encode(payload), + new Headers({ + "Content-Type": `multipart/form-data; boundary=${boundary}`, + }), + ); + + const formData = await body.formData(); + assert(formData.has("文字")); + assertEquals(formData.get("文字"), "文字"); + assert(formData.has("file")); + assert(formData.get("file") instanceof File); + assertEquals((formData.get("file") as File).name, "文字"); + }, +); + +// FormData: non-ASCII names and filenames roundtrip +Deno.test( + { permissions: { net: true } }, + async function bodyMultipartFormDataNonAsciiRoundtrip() { + const inFormData = new FormData(); + inFormData.append("文字", "文字"); + inFormData.append("file", new File([], "文字")); + + const body = buildBody(inFormData); + + const formData = await body.formData(); + assert(formData.has("文字")); + assertEquals(formData.get("文字"), "文字"); + assert(formData.has("file")); + assert(formData.get("file") instanceof File); + assertEquals((formData.get("file") as File).name, "文字"); + }, +); + Deno.test( { permissions: { net: true } }, async function bodyURLEncodedFormData() { diff --git a/ext/fetch/21_formdata.js b/ext/fetch/21_formdata.js index 330ed92e6..1ddd5f656 100644 --- a/ext/fetch/21_formdata.js +++ b/ext/fetch/21_formdata.js @@ -41,6 +41,7 @@ const { StringPrototypeReplaceAll, TypeError, TypedArrayPrototypeSubarray, + Uint8Array, } = primordials; const entryList = Symbol("entry list"); @@ -358,6 +359,20 @@ function parseContentDisposition(value) { return params; } +/** + * Decodes a string containing UTF-8 mistakenly decoded as Latin-1 and + * decodes it correctly. + * @param {string} latin1String + * @returns {string} + */ +function decodeLatin1StringAsUtf8(latin1String) { + const buffer = new Uint8Array(latin1String.length); + for (let i = 0; i < latin1String.length; i++) { + buffer[i] = latin1String.charCodeAt(i); + } + return core.decode(buffer); +} + const CRLF = "\r\n"; const LF = StringPrototypeCodePointAt(CRLF, 1); const CR = StringPrototypeCodePointAt(CRLF, 0); @@ -465,23 +480,31 @@ class MultipartParser { i - boundaryIndex - 1, ); // https://fetch.spec.whatwg.org/#ref-for-dom-body-formdata - const filename = MapPrototypeGet(disposition, "filename"); - const name = MapPrototypeGet(disposition, "name"); + // These are UTF-8 decoded as if it was Latin-1. + // TODO(@andreubotella): Maybe we shouldn't be parsing entry headers + // as Latin-1. + const latin1Filename = MapPrototypeGet(disposition, "filename"); + const latin1Name = MapPrototypeGet(disposition, "name"); state = 5; // Reset boundaryIndex = 0; headerText = ""; - if (!name) { + if (!latin1Name) { continue; // Skip, unknown name } - if (filename) { + const name = decodeLatin1StringAsUtf8(latin1Name); + if (latin1Filename) { const blob = new Blob([content], { type: headers.get("Content-Type") || "application/octet-stream", }); - formData.append(name, blob, filename); + formData.append( + name, + blob, + decodeLatin1StringAsUtf8(latin1Filename), + ); } else { formData.append(name, core.decode(content)); } |