diff options
author | Marcos Casagrande <marcoscvp90@gmail.com> | 2022-10-24 20:27:22 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-24 20:27:22 +0200 |
commit | ac5fcf626a77db7795f7ab2b4f15e4ecb3270171 (patch) | |
tree | 1135cc4a5b4fed75a3f17bb0e3c12cd11c4abf43 /cli/tests/unit/text_encoding_test.ts | |
parent | 7a65b8e8dae8660f0e40130ba5a63f6c10d358c6 (diff) |
perf(ext/web): add op_encode_binary_string (#16352)
Add a new op to use in `reader.readAsBinaryString(blob)`.
```
File API binary string: 400b 35.12 µs/iter (21.93 µs … 3.27 ms) 31.87 µs 131.95 µs 217.63 µs
File API binary string: 4kb 46.49 µs/iter (29.36 µs … 4.42 ms) 42.5 µs 122.48 µs 155.1 µs
File API binary string: 2.2mb 4.17 ms/iter (1.75 ms … 8.54 ms) 5.48 ms 7.39 ms 8.54 ms
```
**main**
```
benchmark time (avg) (min … max) p75 p99 p995
--------------------------------------------------------------------- -----------------------------
File API binary string: 400b 56.17 µs/iter (43.09 µs … 784.52 µs) 49.6 µs 177.18 µs 241.23 µs
File API binary string: 4kb 277.2 µs/iter (240.29 µs … 1.84 ms) 269.87 µs 649.79 µs 774.46 µs
File API binary string: 2.2mb 180.03 ms/iter (173.32 ms … 194.35 ms) 182.54 ms 194.35 ms 194.35 ms
```
It can also handle bigger files, when encoding a 200mb file, main
crashes with OOM
```
<--- Last few GCs --->
[132677:0x560504676550] 5012 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms (average mu = 0.824, current mu = 0.825) allocation failure;
[132677:0x560504676550] 5038 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms (average mu = 0.824, current mu = 0.825) allocation failure;
[132677:0x560504676550] 5064 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms (average mu = 0.824, current mu = 0.825) allocation failure;
```
Diffstat (limited to 'cli/tests/unit/text_encoding_test.ts')
-rw-r--r-- | cli/tests/unit/text_encoding_test.ts | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/cli/tests/unit/text_encoding_test.ts b/cli/tests/unit/text_encoding_test.ts index 70942d98d..06ec09048 100644 --- a/cli/tests/unit/text_encoding_test.ts +++ b/cli/tests/unit/text_encoding_test.ts @@ -247,6 +247,7 @@ Deno.test(function toStringShouldBeWebCompatibility() { const decoder = new TextDecoder(); assertEquals(decoder.toString(), "[object TextDecoder]"); }); + Deno.test(function textEncoderShouldCoerceToString() { const encoder = new TextEncoder(); const fixutreText = "text"; @@ -261,3 +262,60 @@ Deno.test(function textEncoderShouldCoerceToString() { const decoded = decoder.decode(bytes); assertEquals(decoded, fixutreText); }); + +Deno.test(function binaryEncode() { + // @ts-ignore: Deno.core allowed + const ops = Deno.core.ops; + function asBinaryString(bytes: Uint8Array): string { + return Array.from(bytes).map( + (v: number) => String.fromCodePoint(v), + ).join(""); + } + + function decodeBinary(binaryString: string) { + const chars: string[] = Array.from(binaryString); + return chars.map((v: string): number | undefined => v.codePointAt(0)); + } + + // invalid utf-8 code points + const invalid = new Uint8Array([0xC0]); + assertEquals( + ops.op_encode_binary_string(invalid), + asBinaryString(invalid), + ); + + const invalid2 = new Uint8Array([0xC1]); + assertEquals( + ops.op_encode_binary_string(invalid2), + asBinaryString(invalid2), + ); + + for (let i = 0, j = 255; i <= 255; i++, j--) { + const bytes = new Uint8Array([i, j]); + const binaryString = ops.op_encode_binary_string(bytes); + assertEquals( + binaryString, + asBinaryString(bytes), + ); + assertEquals(Array.from(bytes), decodeBinary(binaryString)); + } + + const inputs = [ + "σ😀", + "Кириллица is Cyrillic", + "𝓽𝓮𝔁𝓽", + "lone𝄞\ud888surrogate", + "\udc00\ud800", + "\ud800", + ]; + for (const input of inputs) { + const bytes = new TextEncoder().encode(input); + const binaryString = ops.op_encode_binary_string(bytes); + assertEquals( + binaryString, + asBinaryString(bytes), + ); + + assertEquals(Array.from(bytes), decodeBinary(binaryString)); + } +}); |