diff options
-rw-r--r-- | cli/js/web/encode_utf8.ts | 80 | ||||
-rw-r--r-- | cli/js/web/text_encoding.ts | 16 | ||||
-rw-r--r-- | cli/tests/core_decode_perf.js | 37 | ||||
-rw-r--r-- | cli/tests/core_encode_perf.js | 32 | ||||
-rw-r--r-- | core/bindings.rs | 15 | ||||
-rw-r--r-- | core/encode_decode_test.js | 6 | ||||
-rwxr-xr-x | tools/benchmark.py | 2 |
7 files changed, 32 insertions, 156 deletions
diff --git a/cli/js/web/encode_utf8.ts b/cli/js/web/encode_utf8.ts deleted file mode 100644 index 04e2560b7..000000000 --- a/cli/js/web/encode_utf8.ts +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. -// The following code is based off: -// https://github.com/samthor/fast-text-encoding -// -// Copyright 2017 Sam Thorogood. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); you may not -// use this file except in compliance with the License. You may obtain a copy of -// the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -// License for the specific language governing permissions and limitations under -// the License. -// - -export function encodeUtf8(input: string): Uint8Array { - let pos = 0; - const len = input.length; - - let at = 0; // output position - let tlen = Math.max(32, len + (len >> 1) + 7); // 1.5x size - let target = new Uint8Array((tlen >> 3) << 3); // ... but at 8 byte offset - - while (pos < len) { - let value = input.charCodeAt(pos++); - if (value >= 0xd800 && value <= 0xdbff) { - // high surrogate - if (pos < len) { - const extra = input.charCodeAt(pos); - if ((extra & 0xfc00) === 0xdc00) { - ++pos; - value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; - } - } - if (value >= 0xd800 && value <= 0xdbff) { - continue; // drop lone surrogate - } - } - - // expand the buffer if we couldn't write 4 bytes - if (at + 4 > target.length) { - tlen += 8; // minimum extra - tlen *= 1.0 + (pos / input.length) * 2; // take 2x the remaining - tlen = (tlen >> 3) << 3; // 8 byte offset - - const update = new Uint8Array(tlen); - update.set(target); - target = update; - } - - if ((value & 0xffffff80) === 0) { - // 1-byte - target[at++] = value; // ASCII - continue; - } else if ((value & 0xfffff800) === 0) { - // 2-byte - target[at++] = ((value >> 6) & 0x1f) | 0xc0; - } else if ((value & 0xffff0000) === 0) { - // 3-byte - target[at++] = ((value >> 12) & 0x0f) | 0xe0; - target[at++] = ((value >> 6) & 0x3f) | 0x80; - } else if ((value & 0xffe00000) === 0) { - // 4-byte - target[at++] = ((value >> 18) & 0x07) | 0xf0; - target[at++] = ((value >> 12) & 0x3f) | 0x80; - target[at++] = ((value >> 6) & 0x3f) | 0x80; - } else { - // FIXME: do we care - continue; - } - - target[at++] = (value & 0x3f) | 0x80; - } - - return target.slice(0, at); -} diff --git a/cli/js/web/text_encoding.ts b/cli/js/web/text_encoding.ts index 2da53d934..5f04972aa 100644 --- a/cli/js/web/text_encoding.ts +++ b/cli/js/web/text_encoding.ts @@ -26,7 +26,7 @@ import * as base64 from "./base64.ts"; import { decodeUtf8 } from "./decode_utf8.ts"; import * as domTypes from "./dom_types.ts"; -import { encodeUtf8 } from "./encode_utf8.ts"; +import { core } from "../core.ts"; const CONTINUE = null; const END_OF_STREAM = -1; @@ -352,6 +352,15 @@ export class TextDecoder { bytes = new Uint8Array(0); } + // For simple utf-8 decoding "Deno.core.decode" can be used for performance + if ( + this._encoding === "utf-8" && + this.fatal === false && + this.ignoreBOM === false + ) { + return core.decode(bytes); + } + // For performance reasons we utilise a highly optimised decoder instead of // the general decoder. if (this._encoding === "utf-8") { @@ -396,10 +405,9 @@ interface TextEncoderEncodeIntoResult { export class TextEncoder { readonly encoding = "utf-8"; encode(input = ""): Uint8Array { - // For performance reasons we utilise a highly optimised decoder instead of - // the general decoder. + // Deno.core.encode() provides very efficient utf-8 encoding if (this.encoding === "utf-8") { - return encodeUtf8(input); + return core.encode(input); } const encoder = new UTF8Encoder(); diff --git a/cli/tests/core_decode_perf.js b/cli/tests/core_decode_perf.js deleted file mode 100644 index fc00b8996..000000000 --- a/cli/tests/core_decode_perf.js +++ /dev/null @@ -1,37 +0,0 @@ -const mixed = new TextEncoder().encode("@ฤเน๐"); - -function generateRandom(bytes) { - const result = new Uint8Array(bytes); - let i = 0; - while (i < bytes) { - const toAdd = Math.floor(Math.random() * Math.min(4, bytes - i)); - switch (toAdd) { - case 0: - result[i] = mixed[0]; - i++; - break; - case 1: - result[i] = mixed[1]; - result[i + 1] = mixed[2]; - i += 2; - break; - case 2: - result[i] = mixed[3]; - result[i + 1] = mixed[4]; - result[i + 2] = mixed[5]; - i += 3; - break; - case 3: - result[i] = mixed[6]; - result[i + 1] = mixed[7]; - result[i + 2] = mixed[8]; - result[i + 3] = mixed[9]; - i += 4; - break; - } - } - return result; -} - -const randomData = generateRandom(1024); -for (let i = 0; i < 10_000; i++) Deno.core.decode(randomData); diff --git a/cli/tests/core_encode_perf.js b/cli/tests/core_encode_perf.js deleted file mode 100644 index 5cde81c7a..000000000 --- a/cli/tests/core_encode_perf.js +++ /dev/null @@ -1,32 +0,0 @@ -const mixed = "@ฤเน๐"; - -function generateRandom(bytes) { - let result = ""; - let i = 0; - while (i < bytes) { - const toAdd = Math.floor(Math.random() * Math.min(4, bytes - i)); - switch (toAdd) { - case 0: - result += mixed[0]; - i++; - break; - case 1: - result += mixed[1]; - i++; - break; - case 2: - result += mixed[2]; - i++; - break; - case 3: - result += mixed[3]; - result += mixed[4]; - i += 2; - break; - } - } - return result; -} - -const randomData = generateRandom(1024); -for (let i = 0; i < 10_000; i++) Deno.core.encode(randomData); diff --git a/core/bindings.rs b/core/bindings.rs index 3745abf69..88bdf7f30 100644 --- a/core/bindings.rs +++ b/core/bindings.rs @@ -632,7 +632,20 @@ fn encode( }; let text_str = text.to_rust_string_lossy(scope); let text_bytes = text_str.as_bytes().to_vec().into_boxed_slice(); - let buf = boxed_slice_to_uint8array(scope, text_bytes); + + let buf = if text_bytes.is_empty() { + let ab = v8::ArrayBuffer::new(scope, 0); + v8::Uint8Array::new(ab, 0, 0).expect("Failed to create UintArray8") + } else { + let buf_len = text_bytes.len(); + let backing_store = + v8::ArrayBuffer::new_backing_store_from_boxed_slice(text_bytes); + let mut backing_store_shared = backing_store.make_shared(); + let ab = + v8::ArrayBuffer::with_backing_store(scope, &mut backing_store_shared); + v8::Uint8Array::new(ab, 0, buf_len).expect("Failed to create UintArray8") + }; + rv.set(buf.into()) } diff --git a/core/encode_decode_test.js b/core/encode_decode_test.js index 8a366dd66..294144593 100644 --- a/core/encode_decode_test.js +++ b/core/encode_decode_test.js @@ -27,12 +27,18 @@ function main() { 108, 100 ]; + const empty = Deno.core.encode(""); + if (empty.length !== 0) throw new Error("assert"); + assertArrayEquals(Array.from(Deno.core.encode("๐ฝ๐ฎ๐๐ฝ")), fixture1); assertArrayEquals( Array.from(Deno.core.encode("Hello \udc12\ud834 World")), fixture2 ); + const emptyBuf = Deno.core.decode(new Uint8Array(0)); + if (emptyBuf !== "") throw new Error("assert"); + assert(Deno.core.decode(new Uint8Array(fixture1)) === "๐ฝ๐ฎ๐๐ฝ"); assert(Deno.core.decode(new Uint8Array(fixture2)) === "Hello ๏ฟฝ๏ฟฝ World"); } diff --git a/tools/benchmark.py b/tools/benchmark.py index 3891bc207..c29ca3e8c 100755 --- a/tools/benchmark.py +++ b/tools/benchmark.py @@ -28,9 +28,7 @@ exec_time_benchmarks = [ ("workers_startup", ["cli/tests/workers_startup_bench.ts"]), ("workers_round_robin", ["cli/tests/workers_round_robin_bench.ts"]), ("text_decoder", ["cli/tests/text_decoder_perf.js"]), - ("core_decode", ["cli/tests/core_decode_perf.js"]), ("text_encoder", ["cli/tests/text_encoder_perf.js"]), - ("core_encode", ["cli/tests/core_encode_perf.js"]), ] |