summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBartek Iwaล„czuk <biwanczuk@gmail.com>2020-03-19 21:31:56 +0100
committerGitHub <noreply@github.com>2020-03-19 21:31:56 +0100
commit87d2ba42bf0dedcd91059145bf8ab5941236354b (patch)
tree52ad0906f8b41586cf9fb57ce02646100725b0c6
parent392d2c11182332b8d3c168169b1585e3419cb1eb (diff)
perf: Optimize TextEncoder and TextDecoder (#4430)
* add tests for "Deno.core.encode" and "Deno.core.decode" for empty inputs * use "Deno.core.encode" in "TextEncoder" * use "Deno.core.decode" in "TextDecoder" * remove "core_decode" and "core_encode" benchmarks
-rw-r--r--cli/js/web/encode_utf8.ts80
-rw-r--r--cli/js/web/text_encoding.ts16
-rw-r--r--cli/tests/core_decode_perf.js37
-rw-r--r--cli/tests/core_encode_perf.js32
-rw-r--r--core/bindings.rs15
-rw-r--r--core/encode_decode_test.js6
-rwxr-xr-xtools/benchmark.py2
7 files changed, 32 insertions, 156 deletions
diff --git a/cli/js/web/encode_utf8.ts b/cli/js/web/encode_utf8.ts
deleted file mode 100644
index 04e2560b7..000000000
--- a/cli/js/web/encode_utf8.ts
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
-// The following code is based off:
-// https://github.com/samthor/fast-text-encoding
-//
-// Copyright 2017 Sam Thorogood. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License"); you may not
-// use this file except in compliance with the License. You may obtain a copy of
-// the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-// License for the specific language governing permissions and limitations under
-// the License.
-//
-
-export function encodeUtf8(input: string): Uint8Array {
- let pos = 0;
- const len = input.length;
-
- let at = 0; // output position
- let tlen = Math.max(32, len + (len >> 1) + 7); // 1.5x size
- let target = new Uint8Array((tlen >> 3) << 3); // ... but at 8 byte offset
-
- while (pos < len) {
- let value = input.charCodeAt(pos++);
- if (value >= 0xd800 && value <= 0xdbff) {
- // high surrogate
- if (pos < len) {
- const extra = input.charCodeAt(pos);
- if ((extra & 0xfc00) === 0xdc00) {
- ++pos;
- value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
- }
- }
- if (value >= 0xd800 && value <= 0xdbff) {
- continue; // drop lone surrogate
- }
- }
-
- // expand the buffer if we couldn't write 4 bytes
- if (at + 4 > target.length) {
- tlen += 8; // minimum extra
- tlen *= 1.0 + (pos / input.length) * 2; // take 2x the remaining
- tlen = (tlen >> 3) << 3; // 8 byte offset
-
- const update = new Uint8Array(tlen);
- update.set(target);
- target = update;
- }
-
- if ((value & 0xffffff80) === 0) {
- // 1-byte
- target[at++] = value; // ASCII
- continue;
- } else if ((value & 0xfffff800) === 0) {
- // 2-byte
- target[at++] = ((value >> 6) & 0x1f) | 0xc0;
- } else if ((value & 0xffff0000) === 0) {
- // 3-byte
- target[at++] = ((value >> 12) & 0x0f) | 0xe0;
- target[at++] = ((value >> 6) & 0x3f) | 0x80;
- } else if ((value & 0xffe00000) === 0) {
- // 4-byte
- target[at++] = ((value >> 18) & 0x07) | 0xf0;
- target[at++] = ((value >> 12) & 0x3f) | 0x80;
- target[at++] = ((value >> 6) & 0x3f) | 0x80;
- } else {
- // FIXME: do we care
- continue;
- }
-
- target[at++] = (value & 0x3f) | 0x80;
- }
-
- return target.slice(0, at);
-}
diff --git a/cli/js/web/text_encoding.ts b/cli/js/web/text_encoding.ts
index 2da53d934..5f04972aa 100644
--- a/cli/js/web/text_encoding.ts
+++ b/cli/js/web/text_encoding.ts
@@ -26,7 +26,7 @@
import * as base64 from "./base64.ts";
import { decodeUtf8 } from "./decode_utf8.ts";
import * as domTypes from "./dom_types.ts";
-import { encodeUtf8 } from "./encode_utf8.ts";
+import { core } from "../core.ts";
const CONTINUE = null;
const END_OF_STREAM = -1;
@@ -352,6 +352,15 @@ export class TextDecoder {
bytes = new Uint8Array(0);
}
+ // For simple utf-8 decoding "Deno.core.decode" can be used for performance
+ if (
+ this._encoding === "utf-8" &&
+ this.fatal === false &&
+ this.ignoreBOM === false
+ ) {
+ return core.decode(bytes);
+ }
+
// For performance reasons we utilise a highly optimised decoder instead of
// the general decoder.
if (this._encoding === "utf-8") {
@@ -396,10 +405,9 @@ interface TextEncoderEncodeIntoResult {
export class TextEncoder {
readonly encoding = "utf-8";
encode(input = ""): Uint8Array {
- // For performance reasons we utilise a highly optimised decoder instead of
- // the general decoder.
+ // Deno.core.encode() provides very efficient utf-8 encoding
if (this.encoding === "utf-8") {
- return encodeUtf8(input);
+ return core.encode(input);
}
const encoder = new UTF8Encoder();
diff --git a/cli/tests/core_decode_perf.js b/cli/tests/core_decode_perf.js
deleted file mode 100644
index fc00b8996..000000000
--- a/cli/tests/core_decode_perf.js
+++ /dev/null
@@ -1,37 +0,0 @@
-const mixed = new TextEncoder().encode("@ฤ€เน๐Ÿ˜€");
-
-function generateRandom(bytes) {
- const result = new Uint8Array(bytes);
- let i = 0;
- while (i < bytes) {
- const toAdd = Math.floor(Math.random() * Math.min(4, bytes - i));
- switch (toAdd) {
- case 0:
- result[i] = mixed[0];
- i++;
- break;
- case 1:
- result[i] = mixed[1];
- result[i + 1] = mixed[2];
- i += 2;
- break;
- case 2:
- result[i] = mixed[3];
- result[i + 1] = mixed[4];
- result[i + 2] = mixed[5];
- i += 3;
- break;
- case 3:
- result[i] = mixed[6];
- result[i + 1] = mixed[7];
- result[i + 2] = mixed[8];
- result[i + 3] = mixed[9];
- i += 4;
- break;
- }
- }
- return result;
-}
-
-const randomData = generateRandom(1024);
-for (let i = 0; i < 10_000; i++) Deno.core.decode(randomData);
diff --git a/cli/tests/core_encode_perf.js b/cli/tests/core_encode_perf.js
deleted file mode 100644
index 5cde81c7a..000000000
--- a/cli/tests/core_encode_perf.js
+++ /dev/null
@@ -1,32 +0,0 @@
-const mixed = "@ฤ€เน๐Ÿ˜€";
-
-function generateRandom(bytes) {
- let result = "";
- let i = 0;
- while (i < bytes) {
- const toAdd = Math.floor(Math.random() * Math.min(4, bytes - i));
- switch (toAdd) {
- case 0:
- result += mixed[0];
- i++;
- break;
- case 1:
- result += mixed[1];
- i++;
- break;
- case 2:
- result += mixed[2];
- i++;
- break;
- case 3:
- result += mixed[3];
- result += mixed[4];
- i += 2;
- break;
- }
- }
- return result;
-}
-
-const randomData = generateRandom(1024);
-for (let i = 0; i < 10_000; i++) Deno.core.encode(randomData);
diff --git a/core/bindings.rs b/core/bindings.rs
index 3745abf69..88bdf7f30 100644
--- a/core/bindings.rs
+++ b/core/bindings.rs
@@ -632,7 +632,20 @@ fn encode(
};
let text_str = text.to_rust_string_lossy(scope);
let text_bytes = text_str.as_bytes().to_vec().into_boxed_slice();
- let buf = boxed_slice_to_uint8array(scope, text_bytes);
+
+ let buf = if text_bytes.is_empty() {
+ let ab = v8::ArrayBuffer::new(scope, 0);
+ v8::Uint8Array::new(ab, 0, 0).expect("Failed to create UintArray8")
+ } else {
+ let buf_len = text_bytes.len();
+ let backing_store =
+ v8::ArrayBuffer::new_backing_store_from_boxed_slice(text_bytes);
+ let mut backing_store_shared = backing_store.make_shared();
+ let ab =
+ v8::ArrayBuffer::with_backing_store(scope, &mut backing_store_shared);
+ v8::Uint8Array::new(ab, 0, buf_len).expect("Failed to create UintArray8")
+ };
+
rv.set(buf.into())
}
diff --git a/core/encode_decode_test.js b/core/encode_decode_test.js
index 8a366dd66..294144593 100644
--- a/core/encode_decode_test.js
+++ b/core/encode_decode_test.js
@@ -27,12 +27,18 @@ function main() {
108, 100
];
+ const empty = Deno.core.encode("");
+ if (empty.length !== 0) throw new Error("assert");
+
assertArrayEquals(Array.from(Deno.core.encode("๐“ฝ๐“ฎ๐”๐“ฝ")), fixture1);
assertArrayEquals(
Array.from(Deno.core.encode("Hello \udc12\ud834 World")),
fixture2
);
+ const emptyBuf = Deno.core.decode(new Uint8Array(0));
+ if (emptyBuf !== "") throw new Error("assert");
+
assert(Deno.core.decode(new Uint8Array(fixture1)) === "๐“ฝ๐“ฎ๐”๐“ฝ");
assert(Deno.core.decode(new Uint8Array(fixture2)) === "Hello ๏ฟฝ๏ฟฝ World");
}
diff --git a/tools/benchmark.py b/tools/benchmark.py
index 3891bc207..c29ca3e8c 100755
--- a/tools/benchmark.py
+++ b/tools/benchmark.py
@@ -28,9 +28,7 @@ exec_time_benchmarks = [
("workers_startup", ["cli/tests/workers_startup_bench.ts"]),
("workers_round_robin", ["cli/tests/workers_round_robin_bench.ts"]),
("text_decoder", ["cli/tests/text_decoder_perf.js"]),
- ("core_decode", ["cli/tests/core_decode_perf.js"]),
("text_encoder", ["cli/tests/text_encoder_perf.js"]),
- ("core_encode", ["cli/tests/core_encode_perf.js"]),
]