summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcos Casagrande <marcoscvp90@gmail.com>2022-10-24 20:27:22 +0200
committerGitHub <noreply@github.com>2022-10-24 20:27:22 +0200
commitac5fcf626a77db7795f7ab2b4f15e4ecb3270171 (patch)
tree1135cc4a5b4fed75a3f17bb0e3c12cd11c4abf43
parent7a65b8e8dae8660f0e40130ba5a63f6c10d358c6 (diff)
perf(ext/web): add op_encode_binary_string (#16352)
Add a new op to use in `reader.readAsBinaryString(blob)`. ``` File API binary string: 400b 35.12 µs/iter (21.93 µs … 3.27 ms) 31.87 µs 131.95 µs 217.63 µs File API binary string: 4kb 46.49 µs/iter (29.36 µs … 4.42 ms) 42.5 µs 122.48 µs 155.1 µs File API binary string: 2.2mb 4.17 ms/iter (1.75 ms … 8.54 ms) 5.48 ms 7.39 ms 8.54 ms ``` **main** ``` benchmark time (avg) (min … max) p75 p99 p995 --------------------------------------------------------------------- ----------------------------- File API binary string: 400b 56.17 µs/iter (43.09 µs … 784.52 µs) 49.6 µs 177.18 µs 241.23 µs File API binary string: 4kb 277.2 µs/iter (240.29 µs … 1.84 ms) 269.87 µs 649.79 µs 774.46 µs File API binary string: 2.2mb 180.03 ms/iter (173.32 ms … 194.35 ms) 182.54 ms 194.35 ms 194.35 ms ``` It can also handle bigger files, when encoding a 200mb file, main crashes with OOM ``` <--- Last few GCs ---> [132677:0x560504676550] 5012 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms (average mu = 0.824, current mu = 0.825) allocation failure; [132677:0x560504676550] 5038 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms (average mu = 0.824, current mu = 0.825) allocation failure; [132677:0x560504676550] 5064 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms (average mu = 0.824, current mu = 0.825) allocation failure; ```
-rw-r--r--cli/tests/unit/text_encoding_test.ts58
-rw-r--r--ext/web/10_filereader.js12
-rw-r--r--ext/web/lib.rs6
3 files changed, 66 insertions, 10 deletions
diff --git a/cli/tests/unit/text_encoding_test.ts b/cli/tests/unit/text_encoding_test.ts
index 70942d98d..06ec09048 100644
--- a/cli/tests/unit/text_encoding_test.ts
+++ b/cli/tests/unit/text_encoding_test.ts
@@ -247,6 +247,7 @@ Deno.test(function toStringShouldBeWebCompatibility() {
const decoder = new TextDecoder();
assertEquals(decoder.toString(), "[object TextDecoder]");
});
+
Deno.test(function textEncoderShouldCoerceToString() {
const encoder = new TextEncoder();
const fixutreText = "text";
@@ -261,3 +262,60 @@ Deno.test(function textEncoderShouldCoerceToString() {
const decoded = decoder.decode(bytes);
assertEquals(decoded, fixutreText);
});
+
+Deno.test(function binaryEncode() {
+ // @ts-ignore: Deno.core allowed
+ const ops = Deno.core.ops;
+ function asBinaryString(bytes: Uint8Array): string {
+ return Array.from(bytes).map(
+ (v: number) => String.fromCodePoint(v),
+ ).join("");
+ }
+
+ function decodeBinary(binaryString: string) {
+ const chars: string[] = Array.from(binaryString);
+ return chars.map((v: string): number | undefined => v.codePointAt(0));
+ }
+
+ // invalid utf-8 code points
+ const invalid = new Uint8Array([0xC0]);
+ assertEquals(
+ ops.op_encode_binary_string(invalid),
+ asBinaryString(invalid),
+ );
+
+ const invalid2 = new Uint8Array([0xC1]);
+ assertEquals(
+ ops.op_encode_binary_string(invalid2),
+ asBinaryString(invalid2),
+ );
+
+ for (let i = 0, j = 255; i <= 255; i++, j--) {
+ const bytes = new Uint8Array([i, j]);
+ const binaryString = ops.op_encode_binary_string(bytes);
+ assertEquals(
+ binaryString,
+ asBinaryString(bytes),
+ );
+ assertEquals(Array.from(bytes), decodeBinary(binaryString));
+ }
+
+ const inputs = [
+ "σ😀",
+ "Кириллица is Cyrillic",
+ "𝓽𝓮𝔁𝓽",
+ "lone𝄞\ud888surrogate",
+ "\udc00\ud800",
+ "\ud800",
+ ];
+ for (const input of inputs) {
+ const bytes = new TextEncoder().encode(input);
+ const binaryString = ops.op_encode_binary_string(bytes);
+ assertEquals(
+ binaryString,
+ asBinaryString(bytes),
+ );
+
+ assertEquals(Array.from(bytes), decodeBinary(binaryString));
+ }
+});
diff --git a/ext/web/10_filereader.js b/ext/web/10_filereader.js
index 8a76b2e0f..49f4babe1 100644
--- a/ext/web/10_filereader.js
+++ b/ext/web/10_filereader.js
@@ -13,6 +13,7 @@
"use strict";
((window) => {
+ const core = window.Deno.core;
const webidl = window.__bootstrap.webidl;
const { forgivingBase64Encode } = window.__bootstrap.infra;
const { ProgressEvent } = window.__bootstrap.event;
@@ -21,8 +22,6 @@
const { parseMimeType } = window.__bootstrap.mimesniff;
const { DOMException } = window.__bootstrap.domException;
const {
- ArrayPrototypeJoin,
- ArrayPrototypeMap,
ArrayPrototypePush,
ArrayPrototypeReduce,
FunctionPrototypeCall,
@@ -33,7 +32,6 @@
ObjectPrototypeIsPrototypeOf,
queueMicrotask,
SafeArrayIterator,
- StringFromCodePoint,
Symbol,
TypedArrayPrototypeSet,
TypeError,
@@ -170,13 +168,7 @@
break;
}
case "BinaryString":
- this[result] = ArrayPrototypeJoin(
- ArrayPrototypeMap(
- [...new Uint8Array(bytes.buffer)],
- (v) => StringFromCodePoint(v),
- ),
- "",
- );
+ this[result] = core.ops.op_encode_binary_string(bytes);
break;
case "Text": {
let decoder = undefined;
diff --git a/ext/web/lib.rs b/ext/web/lib.rs
index 85e32b70a..8a9d3e18c 100644
--- a/ext/web/lib.rs
+++ b/ext/web/lib.rs
@@ -94,6 +94,7 @@ pub fn init<P: TimersPermission + 'static>(
op_encoding_new_decoder::decl(),
op_encoding_decode::decl(),
op_encoding_encode_into::decl(),
+ op_encode_binary_string::decl(),
op_blob_create_part::decl(),
op_blob_slice_part::decl(),
op_blob_read_part::decl(),
@@ -337,6 +338,11 @@ fn op_encoding_encode_into(
Ok(())
}
+#[op]
+fn op_encode_binary_string(s: &[u8]) -> ByteString {
+ ByteString::from(s)
+}
+
/// Creates a [`CancelHandle`] resource that can be used to cancel invocations of certain ops.
#[op(fast)]
pub fn op_cancel_handle(state: &mut OpState) -> u32 {