diff options
author | Aapo Alasuutari <aapo.alasuutari@gmail.com> | 2023-09-07 23:41:16 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-07 14:41:16 -0600 |
commit | 9d6584c16f8c759f321ab82a3b9d03d0e2e1d363 (patch) | |
tree | b75d3babc6558ead83f933913d50c14bbd5de945 /ext/node/polyfills/internal_binding/_utils.ts | |
parent | 29784df24eefcbe3ce015c8621c20b0bdbc54090 (diff) |
perf(ext/node): Optimise Buffer string operations (#20158)
Extracted from https://github.com/denoland/deno/pull/17815
Optimise Buffer's string operations, most significantly when dealing
with ASCII and UTF-16. Base64 and HEX encodings are affected to much
lesser degrees.
## Performance
### String length 15
With very small strings we're at break-even or sometimes even lose a tad
bit of performance from creating a `DataView` that ends up not paying
for itself.
**This PR:**
```
benchmark time (avg) iter/s (min … max) p75 p99 p995
-------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string 1.15 µs/iter 871,388.6 (728.78 ns … 1.56 µs) 1.23 µs 1.56 µs 1.56 µs
Buffer.from base64 string 1.63 µs/iter 612,790.9 (1.31 µs … 1.96 µs) 1.77 µs 1.96 µs 1.96 µs
Buffer.from utf16 string 1.41 µs/iter 707,396.3 (915.24 ns … 1.93 µs) 1.61 µs 1.93 µs 1.93 µs
Buffer.from hex string 1.87 µs/iter 535,357.9 (1.56 µs … 2.19 µs) 2 µs 2.19 µs 2.19 µs
Buffer.toString ascii string 154.58 ns/iter 6,469,162.8 (149.69 ns … 198 ns) 154.51 ns 182.89 ns 191.91 ns
Buffer.toString base64 string 161.65 ns/iter 6,186,189.6 (150.91 ns … 181.15 ns) 165.18 ns 171.87 ns 174.94 ns
Buffer.toString utf16 string 292.74 ns/iter 3,415,959.8 (285.43 ns … 312.47 ns) 295.25 ns 310.47 ns 312.47 ns
Buffer.toString hex string 89.61 ns/iter 11,159,315.6 (81.09 ns … 123.77 ns) 91.09 ns 113.62 ns 119.28 ns
```
**Main:**
```
benchmark time (avg) iter/s (min … max) p75 p99 p995
-------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string 1.26 µs/iter 794,875.8 (1.07 µs … 1.46 µs) 1.31 µs 1.46 µs 1.46 µs
Buffer.from base64 string 1.65 µs/iter 607,853.3 (1.38 µs … 2.01 µs) 1.69 µs 2.01 µs 2.01 µs
Buffer.from utf16 string 1.34 µs/iter 744,894.6 (1.09 µs … 1.55 µs) 1.45 µs 1.55 µs 1.55 µs
Buffer.from hex string 2.01 µs/iter 496,345.8 (1.54 µs … 2.6 µs) 2.26 µs 2.6 µs 2.6 µs
Buffer.toString ascii string 150.16 ns/iter 6,659,630.5 (144.99 ns … 166.68 ns) 152.4 ns 157.26 ns 159.14 ns
Buffer.toString base64 string 164.73 ns/iter 6,070,692.0 (158.77 ns … 185.63 ns) 168.48 ns 175.74 ns 176.68 ns
Buffer.toString utf16 string 150.61 ns/iter 6,639,864.0 (148.2 ns … 168.29 ns) 150.93 ns 157.21 ns 168.15 ns
Buffer.toString hex string 94.21 ns/iter 10,614,972.9 (86.21 ns … 98.75 ns) 95.43 ns 97.99 ns 98.21 ns
```
### String length 1500
With moderate lengths we already see great upsides for `Buffer.from()`
with ASCII and UTF-16.
**This PR:**
```
benchmark time (avg) iter/s (min … max) p75 p99 p995
-------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string 5.79 µs/iter 172,562.6 (4.72 µs … 4.71 ms) 5.04 µs 10.3 µs 11.67 µs
Buffer.from base64 string 5.08 µs/iter 196,678.9 (4.97 µs … 5.76 µs) 5.08 µs 5.76 µs 5.76 µs
Buffer.from utf16 string 9.68 µs/iter 103,316.5 (7.14 µs … 3.44 ms) 10.32 µs 13.42 µs 15.21 µs
Buffer.from hex string 53.7 µs/iter 18,620.2 (49.37 µs … 2.2 ms) 54.74 µs 72.2 µs 81.07 µs
Buffer.toString ascii string 6.63 µs/iter 150,761.3 (5.59 µs … 1.11 ms) 6.08 µs 15.68 µs 24.77 µs
Buffer.toString base64 string 460.57 ns/iter 2,171,224.4 (448.33 ns … 511.73 ns) 465.05 ns 495.54 ns 511.73 ns
Buffer.toString utf16 string 6.52 µs/iter 153,287.0 (6.47 µs … 6.66 µs) 6.53 µs 6.66 µs 6.66 µs
Buffer.toString hex string 3.68 µs/iter 271,965.4 (3.64 µs … 3.82 µs) 3.68 µs 3.82 µs 3.82 µs
```
**Main:**
```
benchmark time (avg) iter/s (min … max) p75 p99 p995
-------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string 11.46 µs/iter 87,298.1 (8.53 µs … 834.1 µs) 9.61 µs 83.31 µs 87.3 µs
Buffer.from base64 string 5.4 µs/iter 185,027.8 (5.07 µs … 7.49 µs) 5.44 µs 7.49 µs 7.49 µs
Buffer.from utf16 string 20.3 µs/iter 49,270.8 (13.55 µs … 649.11 µs) 18.8 µs 113.93 µs 125.17 µs
Buffer.from hex string 52.03 µs/iter 19,218.9 (48.74 µs … 2.59 ms) 52.84 µs 67.05 µs 73.56 µs
Buffer.toString ascii string 6.46 µs/iter 154,822.5 (6.32 µs … 6.69 µs) 6.52 µs 6.69 µs 6.69 µs
Buffer.toString base64 string 440.19 ns/iter 2,271,764.6 (427 ns … 490.77 ns) 444.74 ns 484.64 ns 490.77 ns
Buffer.toString utf16 string 6.89 µs/iter 145,106.7 (6.81 µs … 7.24 µs) 6.91 µs 7.24 µs 7.24 µs
Buffer.toString hex string 3.66 µs/iter 273,456.5 (3.6 µs … 4.02 µs) 3.64 µs 4.02 µs 4.02 µs
```
### String length 2^20
With massive lengths we the difference in ASCII and UTF-16 parsing
performance is enormous.
**This PR:**
```
benchmark time (avg) iter/s (min … max) p75 p99 p995
-------------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string 4.1 ms/iter 243.7 (2.64 ms … 6.74 ms) 4.43 ms 6.26 ms 6.74 ms
Buffer.from base64 string 3.74 ms/iter 267.6 (2.91 ms … 4.92 ms) 3.96 ms 4.31 ms 4.92 ms
Buffer.from utf16 string 7.72 ms/iter 129.5 (5.91 ms … 11.03 ms) 7.97 ms 11.03 ms 11.03 ms
Buffer.from hex string 35.72 ms/iter 28.0 (34.71 ms … 38.42 ms) 35.93 ms 38.42 ms 38.42 ms
Buffer.toString ascii string 78.92 ms/iter 12.7 (42.72 ms … 94.13 ms) 91.64 ms 94.13 ms 94.13 ms
Buffer.toString base64 string 833.62 µs/iter 1,199.6 (638.05 µs … 5.97 ms) 826.86 µs 2.45 ms 2.48 ms
Buffer.toString utf16 string 79.35 ms/iter 12.6 (69.72 ms … 88.9 ms) 86.66 ms 88.9 ms 88.9 ms
Buffer.toString hex string 31.04 ms/iter 32.2 (4.3 ms … 46.9 ms) 37.21 ms 46.9 ms 46.9 ms
```
**Main:**
```
benchmark time (avg) iter/s (min … max) p75 p99 p995
-------------------------------------------------------------------------------------------------------------------- -----------------------------
Buffer.from ascii string 18.66 ms/iter 53.6 (15.61 ms … 23.26 ms) 20.62 ms 23.26 ms 23.26 ms
Buffer.from base64 string 4.7 ms/iter 212.9 (2.94 ms … 9.07 ms) 4.65 ms 9.06 ms 9.07 ms
Buffer.from utf16 string 33.49 ms/iter 29.9 (31.24 ms … 35.67 ms) 34.08 ms 35.67 ms 35.67 ms
Buffer.from hex string 39.38 ms/iter 25.4 (38.66 ms … 42.36 ms) 39.58 ms 42.36 ms 42.36 ms
Buffer.toString ascii string 77.68 ms/iter 12.9 (67.46 ms … 95.68 ms) 84.71 ms 95.68 ms 95.68 ms
Buffer.toString base64 string 825.53 µs/iter 1,211.3 (655.38 µs … 6.69 ms) 816.62 µs 3.07 ms 3.13 ms
Buffer.toString utf16 string 76.54 ms/iter 13.1 (66.9 ms … 85.26 ms) 83.63 ms 85.26 ms 85.26 ms
Buffer.toString hex string 38.56 ms/iter 25.9 (33.83 ms … 46.56 ms) 45.33 ms 46.56 ms 46.56 ms
```
Diffstat (limited to 'ext/node/polyfills/internal_binding/_utils.ts')
-rw-r--r-- | ext/node/polyfills/internal_binding/_utils.ts | 86 |
1 files changed, 51 insertions, 35 deletions
diff --git a/ext/node/polyfills/internal_binding/_utils.ts b/ext/node/polyfills/internal_binding/_utils.ts index d543fd372..ab174608b 100644 --- a/ext/node/polyfills/internal_binding/_utils.ts +++ b/ext/node/polyfills/internal_binding/_utils.ts @@ -9,11 +9,12 @@ import { } from "ext:deno_web/00_infra.js"; export function asciiToBytes(str: string) { - const byteArray = []; - for (let i = 0; i < str.length; ++i) { - byteArray.push(str.charCodeAt(i) & 255); + const length = str.length; + const byteArray = new Uint8Array(length); + for (let i = 0; i < length; ++i) { + byteArray[i] = str.charCodeAt(i) & 255; } - return new Uint8Array(byteArray); + return byteArray; } export function base64ToBytes(str: string) { @@ -25,16 +26,26 @@ export function base64ToBytes(str: string) { const INVALID_BASE64_RE = /[^+/0-9A-Za-z-_]/g; function base64clean(str: string) { // Node takes equal signs as end of the Base64 encoding - str = str.split("=")[0]; + const eqIndex = str.indexOf("="); + str = eqIndex !== -1 ? str.substring(0, eqIndex).trimStart() : str.trim(); // Node strips out invalid characters like \n and \t from the string, std/base64 does not - str = str.trim().replace(INVALID_BASE64_RE, ""); + str = str.replace(INVALID_BASE64_RE, ""); // Node converts strings with length < 2 to '' - if (str.length < 2) return ""; + const length = str.length; + if (length < 2) return ""; // Node allows for non-padded base64 strings (missing trailing ===), std/base64 does not - while (str.length % 4 !== 0) { - str = str + "="; + switch (length % 4) { + case 0: + return str; + case 1: + return `${str}===`; + case 2: + return `${str}==`; + case 3: + return `${str}=`; + default: + throw new Error("Unexpected NaN value for string length"); } - return str; } export function base64UrlToBytes(str: string) { @@ -44,9 +55,10 @@ export function base64UrlToBytes(str: string) { } export function hexToBytes(str: string) { - const byteArray = new Uint8Array(Math.floor((str || "").length / 2)); - let i; - for (i = 0; i < byteArray.length; i++) { + const length = str.length >>> 1; + const byteArray = new Uint8Array(length); + let i: number; + for (i = 0; i < length; i++) { const a = Number.parseInt(str[i * 2], 16); const b = Number.parseInt(str[i * 2 + 1], 16); if (Number.isNaN(a) && Number.isNaN(b)) { @@ -54,39 +66,43 @@ export function hexToBytes(str: string) { } byteArray[i] = (a << 4) | b; } - return new Uint8Array( - i === byteArray.length ? byteArray : byteArray.slice(0, i), - ); + // Returning a buffer subarray is okay: This API's return value + // is never exposed to users and is only ever used for its length + // and the data within the subarray. + return i === length ? byteArray : byteArray.subarray(0, i); } -export function utf16leToBytes(str: string, units: number) { - let c, hi, lo; - const byteArray = []; - for (let i = 0; i < str.length; ++i) { - if ((units -= 2) < 0) { - break; - } - c = str.charCodeAt(i); - hi = c >> 8; - lo = c % 256; - byteArray.push(lo); - byteArray.push(hi); +export function utf16leToBytes(str: string, units?: number) { + // If units is defined, round it to even values for 16 byte "steps" + // and use it as an upper bound value for our string byte array's length. + const length = Math.min(str.length * 2, units ? (units >>> 1) * 2 : Infinity); + const byteArray = new Uint8Array(length); + const view = new DataView(byteArray.buffer); + let i: number; + for (i = 0; i * 2 < length; i++) { + view.setUint16(i * 2, str.charCodeAt(i), true); } - return new Uint8Array(byteArray); + // Returning a buffer subarray is okay: This API's return value + // is never exposed to users and is only ever used for its length + // and the data within the subarray. + return i * 2 === length ? byteArray : byteArray.subarray(0, i * 2); } export function bytesToAscii(bytes: Uint8Array) { - let ret = ""; - for (let i = 0; i < bytes.length; ++i) { - ret += String.fromCharCode(bytes[i] & 127); + let res = ""; + const length = bytes.byteLength; + for (let i = 0; i < length; ++i) { + res = `${res}${String.fromCharCode(bytes[i] & 127)}`; } - return ret; + return res; } export function bytesToUtf16le(bytes: Uint8Array) { let res = ""; - for (let i = 0; i < bytes.length - 1; i += 2) { - res += String.fromCharCode(bytes[i] + bytes[i + 1] * 256); + const length = bytes.byteLength; + const view = new DataView(bytes.buffer, bytes.byteOffset, length); + for (let i = 0; i < length - 1; i += 2) { + res = `${res}${String.fromCharCode(view.getUint16(i, true))}`; } return res; } |