summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcos Casagrande <marcoscvp90@gmail.com>2020-06-01 14:32:08 +0200
committerGitHub <noreply@github.com>2020-06-01 14:32:08 +0200
commit1d3dce9a68c981aded31b4eb12f8a2ec4beecfab (patch)
treeea21bc746bab92715d5d9621edd272167f61451a
parentedeeedf40161dcc4932a33139a7fffa1a73cc142 (diff)
fix(cli/js/web): formData parser for binary files (#6015)
-rw-r--r--cli/js/web/body.ts92
-rw-r--r--cli/js/web/fetch/multipart.ts120
-rw-r--r--cli/tests/unit/fetch_test.ts19
-rwxr-xr-xtools/http_server.py19
4 files changed, 163 insertions, 87 deletions
diff --git a/cli/js/web/body.ts b/cli/js/web/body.ts
index ffe3f0e59..7a5b63d32 100644
--- a/cli/js/web/body.ts
+++ b/cli/js/web/body.ts
@@ -3,6 +3,7 @@ import * as encoding from "./text_encoding.ts";
import * as domTypes from "./dom_types.d.ts";
import { ReadableStreamImpl } from "./streams/readable_stream.ts";
import { getHeaderValueParams, hasHeaderValueOf } from "./util.ts";
+import { MultipartParser } from "./fetch/multipart.ts";
// only namespace imports work for now, plucking out what we need
const { TextEncoder, TextDecoder } = encoding;
@@ -130,98 +131,15 @@ export class Body implements domTypes.Body {
// ref: https://fetch.spec.whatwg.org/#body-mixin
public async formData(): Promise<FormData> {
const formData = new FormData();
- const enc = new TextEncoder();
if (hasHeaderValueOf(this.contentType, "multipart/form-data")) {
const params = getHeaderValueParams(this.contentType);
- if (!params.has("boundary")) {
- // TypeError is required by spec
- throw new TypeError("multipart/form-data must provide a boundary");
- }
+
// ref: https://tools.ietf.org/html/rfc2046#section-5.1
const boundary = params.get("boundary")!;
- const dashBoundary = `--${boundary}`;
- const delimiter = `\r\n${dashBoundary}`;
- const closeDelimiter = `${delimiter}--`;
-
- const body = await this.text();
- let bodyParts: string[];
- const bodyEpilogueSplit = body.split(closeDelimiter);
- if (bodyEpilogueSplit.length < 2) {
- bodyParts = [];
- } else {
- // discard epilogue
- const bodyEpilogueTrimmed = bodyEpilogueSplit[0];
- // first boundary treated special due to optional prefixed \r\n
- const firstBoundaryIndex = bodyEpilogueTrimmed.indexOf(dashBoundary);
- if (firstBoundaryIndex < 0) {
- throw new TypeError("Invalid boundary");
- }
- const bodyPreambleTrimmed = bodyEpilogueTrimmed
- .slice(firstBoundaryIndex + dashBoundary.length)
- .replace(/^[\s\r\n\t]+/, ""); // remove transport-padding CRLF
- // trimStart might not be available
- // Be careful! body-part allows trailing \r\n!
- // (as long as it is not part of `delimiter`)
- bodyParts = bodyPreambleTrimmed
- .split(delimiter)
- .map((s): string => s.replace(/^[\s\r\n\t]+/, ""));
- // TODO: LWSP definition is actually trickier,
- // but should be fine in our case since without headers
- // we should just discard the part
- }
- for (const bodyPart of bodyParts) {
- const headers = new Headers();
- const headerOctetSeperatorIndex = bodyPart.indexOf("\r\n\r\n");
- if (headerOctetSeperatorIndex < 0) {
- continue; // Skip unknown part
- }
- const headerText = bodyPart.slice(0, headerOctetSeperatorIndex);
- const octets = bodyPart.slice(headerOctetSeperatorIndex + 4);
+ const body = new Uint8Array(await this.arrayBuffer());
+ const multipartParser = new MultipartParser(body, boundary);
- // TODO: use textproto.readMIMEHeader from deno_std
- const rawHeaders = headerText.split("\r\n");
- for (const rawHeader of rawHeaders) {
- const sepIndex = rawHeader.indexOf(":");
- if (sepIndex < 0) {
- continue; // Skip this header
- }
- const key = rawHeader.slice(0, sepIndex);
- const value = rawHeader.slice(sepIndex + 1);
- headers.set(key, value);
- }
- if (!headers.has("content-disposition")) {
- continue; // Skip unknown part
- }
- // Content-Transfer-Encoding Deprecated
- const contentDisposition = headers.get("content-disposition")!;
- const partContentType = headers.get("content-type") || "text/plain";
- // TODO: custom charset encoding (needs TextEncoder support)
- // const contentTypeCharset =
- // getHeaderValueParams(partContentType).get("charset") || "";
- if (!hasHeaderValueOf(contentDisposition, "form-data")) {
- continue; // Skip, might not be form-data
- }
- const dispositionParams = getHeaderValueParams(contentDisposition);
- if (!dispositionParams.has("name")) {
- continue; // Skip, unknown name
- }
- const dispositionName = dispositionParams.get("name")!;
- if (dispositionParams.has("filename")) {
- const filename = dispositionParams.get("filename")!;
- const blob = new DenoBlob([enc.encode(octets)], {
- type: partContentType,
- });
- // TODO: based on spec
- // https://xhr.spec.whatwg.org/#dom-formdata-append
- // https://xhr.spec.whatwg.org/#create-an-entry
- // Currently it does not mention how I could pass content-type
- // to the internally created file object...
- formData.append(dispositionName, blob, filename);
- } else {
- formData.append(dispositionName, octets);
- }
- }
- return formData;
+ return multipartParser.parse();
} else if (
hasHeaderValueOf(this.contentType, "application/x-www-form-urlencoded")
) {
diff --git a/cli/js/web/fetch/multipart.ts b/cli/js/web/fetch/multipart.ts
new file mode 100644
index 000000000..792f9b5ee
--- /dev/null
+++ b/cli/js/web/fetch/multipart.ts
@@ -0,0 +1,120 @@
+// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
+
+import { DenoBlob } from "../blob.ts";
+import { TextEncoder, TextDecoder } from "../text_encoding.ts";
+import { getHeaderValueParams } from "../util.ts";
+
+const decoder = new TextDecoder();
+const encoder = new TextEncoder();
+const CR = "\r".charCodeAt(0);
+const LF = "\n".charCodeAt(0);
+
+interface MultipartHeaders {
+ headers: Headers;
+ disposition: Map<string, string>;
+}
+
+export class MultipartParser {
+ readonly boundary: string;
+ readonly boundaryChars: Uint8Array;
+ readonly body: Uint8Array;
+ constructor(body: Uint8Array, boundary: string) {
+ if (!boundary) {
+ throw new TypeError("multipart/form-data must provide a boundary");
+ }
+
+ this.boundary = `--${boundary}`;
+ this.body = body;
+ this.boundaryChars = encoder.encode(this.boundary);
+ }
+
+ #parseHeaders = (headersText: string): MultipartHeaders => {
+ const headers = new Headers();
+ const rawHeaders = headersText.split("\r\n");
+ for (const rawHeader of rawHeaders) {
+ const sepIndex = rawHeader.indexOf(":");
+ if (sepIndex < 0) {
+ continue; // Skip this header
+ }
+ const key = rawHeader.slice(0, sepIndex);
+ const value = rawHeader.slice(sepIndex + 1);
+ headers.set(key, value);
+ }
+
+ return {
+ headers,
+ disposition: getHeaderValueParams(
+ headers.get("Content-Disposition") ?? ""
+ ),
+ };
+ };
+
+ parse(): FormData {
+ const formData = new FormData();
+ let headerText = "";
+ let boundaryIndex = 0;
+ let state = 0;
+ let fileStart = 0;
+
+ for (let i = 0; i < this.body.length; i++) {
+ const byte = this.body[i];
+ const prevByte = this.body[i - 1];
+ const isNewLine = byte === LF && prevByte === CR;
+
+ if (state === 1 || state === 2 || state == 3) {
+ headerText += String.fromCharCode(byte);
+ }
+ if (state === 0 && isNewLine) {
+ state = 1;
+ } else if (state === 1 && isNewLine) {
+ state = 2;
+ const headersDone = this.body[i + 1] === CR && this.body[i + 2] === LF;
+
+ if (headersDone) {
+ state = 3;
+ }
+ } else if (state === 2 && isNewLine) {
+ state = 3;
+ } else if (state === 3 && isNewLine) {
+ state = 4;
+ fileStart = i + 1;
+ } else if (state === 4) {
+ if (this.boundaryChars[boundaryIndex] !== byte) {
+ boundaryIndex = 0;
+ } else {
+ boundaryIndex++;
+ }
+
+ if (boundaryIndex >= this.boundary.length) {
+ const { headers, disposition } = this.#parseHeaders(headerText);
+ const content = this.body.subarray(fileStart, i - boundaryIndex - 1);
+ // https://fetch.spec.whatwg.org/#ref-for-dom-body-formdata
+ const filename = disposition.get("filename");
+ const name = disposition.get("name");
+
+ state = 5;
+ // Reset
+ boundaryIndex = 0;
+ headerText = "";
+
+ if (!name) {
+ continue; // Skip, unknown name
+ }
+
+ if (filename) {
+ const blob = new DenoBlob([content], {
+ type: headers.get("Content-Type") || "application/octet-stream",
+ });
+ formData.append(name, blob, filename);
+ } else {
+ formData.append(name, decoder.decode(content));
+ }
+ }
+ } else if (state === 5 && isNewLine) {
+ state = 1;
+ }
+ }
+
+ return formData;
+ }
+}
diff --git a/cli/tests/unit/fetch_test.ts b/cli/tests/unit/fetch_test.ts
index c1dde92a9..57414d652 100644
--- a/cli/tests/unit/fetch_test.ts
+++ b/cli/tests/unit/fetch_test.ts
@@ -217,6 +217,25 @@ unitTest(
);
unitTest(
+ { perms: { net: true } },
+ async function fetchInitFormDataBinaryFileBody(): Promise<void> {
+ // Some random bytes
+ // prettier-ignore
+ const binaryFile = new Uint8Array([108,2,0,0,145,22,162,61,157,227,166,77,138,75,180,56,119,188,177,183]);
+ const response = await fetch("http://localhost:4545/echo_multipart_file", {
+ method: "POST",
+ body: binaryFile,
+ });
+ const resultForm = await response.formData();
+ const resultFile = resultForm.get("file") as File;
+
+ assertEquals(resultFile.type, "application/octet-stream");
+ assertEquals(resultFile.name, "file.bin");
+ assertEquals(new Uint8Array(await resultFile.arrayBuffer()), binaryFile);
+ }
+);
+
+unitTest(
{
perms: { net: true },
},
diff --git a/tools/http_server.py b/tools/http_server.py
index 76f8e6e69..d143f0ba8 100755
--- a/tools/http_server.py
+++ b/tools/http_server.py
@@ -207,6 +207,25 @@ class ContentTypeHandler(QuietSimpleHTTPRequestHandler):
data_string = self.rfile.read(int(self.headers['Content-Length']))
self.wfile.write(bytes(data_string))
return
+ if "echo_multipart_file" in self.path:
+ self.protocol_version = 'HTTP/1.1'
+ self.send_response(200, 'OK')
+ self.send_header('Content-type',
+ 'multipart/form-data;boundary=boundary')
+ self.end_headers()
+ file_content = self.rfile.read(int(self.headers['Content-Length']))
+ self.wfile.write(
+ bytes('--boundary\t \r\n'
+ 'Content-Disposition: form-data; name="field_1"\r\n'
+ '\r\n'
+ 'value_1 \r\n'
+ '\r\n--boundary\r\n'
+ 'Content-Disposition: form-data; name="file"; '
+ 'filename="file.bin"\r\n'
+ 'Content-Type: application/octet-stream\r\n'
+ '\r\n') + bytes(file_content) +
+ bytes('\r\n--boundary--\r\n'))
+ return
self.protocol_version = 'HTTP/1.1'
self.send_response(501)
self.send_header('content-type', 'text/plain')