summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsnek <the@snek.dev>2024-05-21 15:50:59 -0700
committerGitHub <noreply@github.com>2024-05-21 15:50:59 -0700
commit8f2d17140468512372ccd3aeebb0d505c607b977 (patch)
treeb8a4e2d10a0cd3121d637e399b111b0c3dd8b32a
parentdb82e8b557c9836481618a73cec7c014903ff256 (diff)
feat(node): buffer isUtf8/isAscii (#23928)
Fixes: https://github.com/denoland/deno/issues/23657 Implements `isUtf8` and `isAscii` as ops.
-rw-r--r--ext/node/lib.rs2
-rw-r--r--ext/node/ops/buffer.rs13
-rw-r--r--ext/node/ops/mod.rs1
-rw-r--r--ext/node/polyfills/buffer.ts2
-rw-r--r--ext/node/polyfills/internal/buffer.mjs49
-rw-r--r--ext/node/polyfills/internal/errors.ts7
-rw-r--r--tests/node_compat/config.jsonc2
-rw-r--r--tests/node_compat/runner/TODO.md2
-rw-r--r--tests/node_compat/test/parallel/test-buffer-isascii.js49
-rw-r--r--tests/node_compat/test/parallel/test-buffer-isutf8.js93
-rwxr-xr-xtools/copyright_checker.js1
11 files changed, 219 insertions, 2 deletions
diff --git a/ext/node/lib.rs b/ext/node/lib.rs
index b4eeb71c2..2b31f704f 100644
--- a/ext/node/lib.rs
+++ b/ext/node/lib.rs
@@ -186,6 +186,8 @@ deno_core::extension!(deno_node,
deps = [ deno_io, deno_fs ],
parameters = [P: NodePermissions],
ops = [
+ ops::buffer::op_is_ascii,
+ ops::buffer::op_is_utf8,
ops::crypto::op_node_create_decipheriv,
ops::crypto::op_node_cipheriv_encrypt,
ops::crypto::op_node_cipheriv_final,
diff --git a/ext/node/ops/buffer.rs b/ext/node/ops/buffer.rs
new file mode 100644
index 000000000..74a011ab8
--- /dev/null
+++ b/ext/node/ops/buffer.rs
@@ -0,0 +1,13 @@
+// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
+
+use deno_core::op2;
+
+#[op2(fast)]
+pub fn op_is_ascii(#[buffer] buf: &[u8]) -> bool {
+ buf.is_ascii()
+}
+
+#[op2(fast)]
+pub fn op_is_utf8(#[buffer] buf: &[u8]) -> bool {
+ std::str::from_utf8(buf).is_ok()
+}
diff --git a/ext/node/ops/mod.rs b/ext/node/ops/mod.rs
index 6381530dd..ae703e3f3 100644
--- a/ext/node/ops/mod.rs
+++ b/ext/node/ops/mod.rs
@@ -1,5 +1,6 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
+pub mod buffer;
pub mod crypto;
pub mod fs;
pub mod http;
diff --git a/ext/node/polyfills/buffer.ts b/ext/node/polyfills/buffer.ts
index 5925475c4..c5a910cb4 100644
--- a/ext/node/polyfills/buffer.ts
+++ b/ext/node/polyfills/buffer.ts
@@ -7,6 +7,8 @@ export {
Buffer,
constants,
default,
+ isAscii,
+ isUtf8,
kMaxLength,
kStringMaxLength,
SlowBuffer,
diff --git a/ext/node/polyfills/internal/buffer.mjs b/ext/node/polyfills/internal/buffer.mjs
index 5c76a21a5..0521c56aa 100644
--- a/ext/node/polyfills/internal/buffer.mjs
+++ b/ext/node/polyfills/internal/buffer.mjs
@@ -6,6 +6,7 @@
// deno-lint-ignore-file prefer-primordials
import { core } from "ext:core/mod.js";
+import { op_is_ascii, op_is_utf8 } from "ext:core/ops";
import { TextDecoder, TextEncoder } from "ext:deno_web/08_text_encoding.js";
import { codes } from "ext:deno_node/internal/error_codes.ts";
@@ -26,10 +27,12 @@ import {
import {
isAnyArrayBuffer,
isArrayBufferView,
+ isTypedArray,
} from "ext:deno_node/internal/util/types.ts";
import { normalizeEncoding } from "ext:deno_node/internal/util.mjs";
import { validateBuffer } from "ext:deno_node/internal/validators.mjs";
import { isUint8Array } from "ext:deno_node/internal/util/types.ts";
+import { ERR_INVALID_STATE } from "ext:deno_node/internal/errors.ts";
import {
forgivingBase64Encode,
forgivingBase64UrlEncode,
@@ -2536,12 +2539,58 @@ export function writeU_Int24LE(buf, value, offset, min, max) {
return offset;
}
+export function isUtf8(input) {
+ if (isTypedArray(input)) {
+ if (input.buffer.detached) {
+ throw new ERR_INVALID_STATE("Cannot validate on a detached buffer");
+ }
+ return op_is_utf8(input);
+ }
+
+ if (isAnyArrayBuffer(input)) {
+ if (input.detached) {
+ throw new ERR_INVALID_STATE("Cannot validate on a detached buffer");
+ }
+ return op_is_utf8(new Uint8Array(input));
+ }
+
+ throw new codes.ERR_INVALID_ARG_TYPE("input", [
+ "ArrayBuffer",
+ "Buffer",
+ "TypedArray",
+ ], input);
+}
+
+export function isAscii(input) {
+ if (isTypedArray(input)) {
+ if (input.buffer.detached) {
+ throw new ERR_INVALID_STATE("Cannot validate on a detached buffer");
+ }
+ return op_is_ascii(input);
+ }
+
+ if (isAnyArrayBuffer(input)) {
+ if (input.detached) {
+ throw new ERR_INVALID_STATE("Cannot validate on a detached buffer");
+ }
+ return op_is_ascii(new Uint8Array(input));
+ }
+
+ throw new codes.ERR_INVALID_ARG_TYPE("input", [
+ "ArrayBuffer",
+ "Buffer",
+ "TypedArray",
+ ], input);
+}
+
export default {
atob,
btoa,
Blob,
Buffer,
constants,
+ isAscii,
+ isUtf8,
kMaxLength,
kStringMaxLength,
SlowBuffer,
diff --git a/ext/node/polyfills/internal/errors.ts b/ext/node/polyfills/internal/errors.ts
index c3aeff8b2..a16656087 100644
--- a/ext/node/polyfills/internal/errors.ts
+++ b/ext/node/polyfills/internal/errors.ts
@@ -2564,6 +2564,12 @@ export class ERR_HTTP_SOCKET_ASSIGNED extends NodeError {
}
}
+export class ERR_INVALID_STATE extends NodeError {
+ constructor(message: string) {
+ super("ERR_INVALID_STATE", `Invalid state: ${message}`);
+ }
+}
+
interface UvExceptionContext {
syscall: string;
path?: string;
@@ -2824,6 +2830,7 @@ export default {
ERR_INVALID_RETURN_PROPERTY,
ERR_INVALID_RETURN_PROPERTY_VALUE,
ERR_INVALID_RETURN_VALUE,
+ ERR_INVALID_STATE,
ERR_INVALID_SYNC_FORK_INPUT,
ERR_INVALID_THIS,
ERR_INVALID_TUPLE,
diff --git a/tests/node_compat/config.jsonc b/tests/node_compat/config.jsonc
index af45cc663..56f43b444 100644
--- a/tests/node_compat/config.jsonc
+++ b/tests/node_compat/config.jsonc
@@ -179,7 +179,9 @@
"test-buffer-includes.js",
"test-buffer-indexof.js",
"test-buffer-inheritance.js",
+ "test-buffer-isascii.js",
"test-buffer-isencoding.js",
+ "test-buffer-isutf8.js",
"test-buffer-iterator.js",
"test-buffer-new.js",
"test-buffer-no-negative-allocation.js",
diff --git a/tests/node_compat/runner/TODO.md b/tests/node_compat/runner/TODO.md
index b812981e5..0bc19a076 100644
--- a/tests/node_compat/runner/TODO.md
+++ b/tests/node_compat/runner/TODO.md
@@ -227,8 +227,6 @@ NOTE: This file should not be manually edited. Please edit `tests/node_compat/co
- [parallel/test-buffer-constructor-outside-node-modules.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-constructor-outside-node-modules.js)
- [parallel/test-buffer-fill.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-fill.js)
- [parallel/test-buffer-inspect.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-inspect.js)
-- [parallel/test-buffer-isascii.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-isascii.js)
-- [parallel/test-buffer-isutf8.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-isutf8.js)
- [parallel/test-buffer-pending-deprecation.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-pending-deprecation.js)
- [parallel/test-buffer-pool-untransferable.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-pool-untransferable.js)
- [parallel/test-buffer-prototype-inspect.js](https://github.com/nodejs/node/tree/v18.12.1/test/parallel/test-buffer-prototype-inspect.js)
diff --git a/tests/node_compat/test/parallel/test-buffer-isascii.js b/tests/node_compat/test/parallel/test-buffer-isascii.js
new file mode 100644
index 000000000..59612bb9d
--- /dev/null
+++ b/tests/node_compat/test/parallel/test-buffer-isascii.js
@@ -0,0 +1,49 @@
+// deno-fmt-ignore-file
+// deno-lint-ignore-file
+
+// Copyright Joyent and Node contributors. All rights reserved. MIT license.
+// Taken from Node 18.12.1
+// This file is automatically generated by `tests/node_compat/runner/setup.ts`. Do not modify this file manually.
+
+'use strict';
+
+require('../common');
+const assert = require('assert');
+const { isAscii, Buffer } = require('buffer');
+const { TextEncoder } = require('util');
+
+const encoder = new TextEncoder();
+
+assert.strictEqual(isAscii(encoder.encode('hello')), true);
+assert.strictEqual(isAscii(encoder.encode('ğ')), false);
+assert.strictEqual(isAscii(Buffer.from([])), true);
+
+[
+ undefined,
+ '', 'hello',
+ false, true,
+ 0, 1,
+ 0n, 1n,
+ Symbol(),
+ () => {},
+ {}, [], null,
+].forEach((input) => {
+ assert.throws(
+ () => { isAscii(input); },
+ {
+ code: 'ERR_INVALID_ARG_TYPE',
+ },
+ );
+});
+
+{
+ // Test with detached array buffers
+ const arrayBuffer = new ArrayBuffer(1024);
+ structuredClone(arrayBuffer, { transfer: [arrayBuffer] });
+ assert.throws(
+ () => { isAscii(arrayBuffer); },
+ {
+ code: 'ERR_INVALID_STATE'
+ }
+ );
+}
diff --git a/tests/node_compat/test/parallel/test-buffer-isutf8.js b/tests/node_compat/test/parallel/test-buffer-isutf8.js
new file mode 100644
index 000000000..02f142db8
--- /dev/null
+++ b/tests/node_compat/test/parallel/test-buffer-isutf8.js
@@ -0,0 +1,93 @@
+// deno-fmt-ignore-file
+// deno-lint-ignore-file
+
+// Copyright Joyent and Node contributors. All rights reserved. MIT license.
+// Taken from Node 18.12.1
+// This file is automatically generated by `tests/node_compat/runner/setup.ts`. Do not modify this file manually.
+
+'use strict';
+
+require('../common');
+const assert = require('assert');
+const { isUtf8, Buffer } = require('buffer');
+const { TextEncoder } = require('util');
+
+const encoder = new TextEncoder();
+
+assert.strictEqual(isUtf8(encoder.encode('hello')), true);
+assert.strictEqual(isUtf8(encoder.encode('ğ')), true);
+assert.strictEqual(isUtf8(Buffer.from([])), true);
+
+// Taken from test/fixtures/wpt/encoding/textdecoder-fatal.any.js
+[
+ [0xFF], // 'invalid code'
+ [0xC0], // 'ends early'
+ [0xE0], // 'ends early 2'
+ [0xC0, 0x00], // 'invalid trail'
+ [0xC0, 0xC0], // 'invalid trail 2'
+ [0xE0, 0x00], // 'invalid trail 3'
+ [0xE0, 0xC0], // 'invalid trail 4'
+ [0xE0, 0x80, 0x00], // 'invalid trail 5'
+ [0xE0, 0x80, 0xC0], // 'invalid trail 6'
+ [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], // '> 0x10FFFF'
+ [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], // 'obsolete lead byte'
+
+ // Overlong encodings
+ [0xC0, 0x80], // 'overlong U+0000 - 2 bytes'
+ [0xE0, 0x80, 0x80], // 'overlong U+0000 - 3 bytes'
+ [0xF0, 0x80, 0x80, 0x80], // 'overlong U+0000 - 4 bytes'
+ [0xF8, 0x80, 0x80, 0x80, 0x80], // 'overlong U+0000 - 5 bytes'
+ [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], // 'overlong U+0000 - 6 bytes'
+
+ [0xC1, 0xBF], // 'overlong U+007F - 2 bytes'
+ [0xE0, 0x81, 0xBF], // 'overlong U+007F - 3 bytes'
+ [0xF0, 0x80, 0x81, 0xBF], // 'overlong U+007F - 4 bytes'
+ [0xF8, 0x80, 0x80, 0x81, 0xBF], // 'overlong U+007F - 5 bytes'
+ [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], // 'overlong U+007F - 6 bytes'
+
+ [0xE0, 0x9F, 0xBF], // 'overlong U+07FF - 3 bytes'
+ [0xF0, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 4 bytes'
+ [0xF8, 0x80, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 5 bytes'
+ [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], // 'overlong U+07FF - 6 bytes'
+
+ [0xF0, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 4 bytes'
+ [0xF8, 0x80, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 5 bytes'
+ [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], // 'overlong U+FFFF - 6 bytes'
+
+ [0xF8, 0x84, 0x8F, 0xBF, 0xBF], // 'overlong U+10FFFF - 5 bytes'
+ [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], // 'overlong U+10FFFF - 6 bytes'
+
+ // UTF-16 surrogates encoded as code points in UTF-8
+ [0xED, 0xA0, 0x80], // 'lead surrogate'
+ [0xED, 0xB0, 0x80], // 'trail surrogate'
+ [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], // 'surrogate pair'
+].forEach((input) => {
+ assert.strictEqual(isUtf8(Buffer.from(input)), false);
+});
+
+[
+ null,
+ undefined,
+ 'hello',
+ true,
+ false,
+].forEach((input) => {
+ assert.throws(
+ () => { isUtf8(input); },
+ {
+ code: 'ERR_INVALID_ARG_TYPE',
+ },
+ );
+});
+
+{
+ // Test with detached array buffers
+ const arrayBuffer = new ArrayBuffer(1024);
+ structuredClone(arrayBuffer, { transfer: [arrayBuffer] });
+ assert.throws(
+ () => { isUtf8(arrayBuffer); },
+ {
+ code: 'ERR_INVALID_STATE'
+ }
+ );
+}
diff --git a/tools/copyright_checker.js b/tools/copyright_checker.js
index 2cabab6fa..4ffde77c1 100755
--- a/tools/copyright_checker.js
+++ b/tools/copyright_checker.js
@@ -103,6 +103,7 @@ export async function checkCopyright() {
// show all the errors at the same time to prevent overlap with
// other running scripts that may be outputting
console.error(errors.join("\n"));
+ console.error(`Expected copyright:\n\`\`\`\n${COPYRIGHT_LINE}\n\`\`\``);
throw new Error(`Copyright checker had ${errors.length} errors.`);
}
}