summaryrefslogtreecommitdiff
path: root/ext/node
diff options
context:
space:
mode:
authorSatya Rohith <me@satyarohith.com>2024-10-02 13:53:14 +0530
committerGitHub <noreply@github.com>2024-10-02 08:23:14 +0000
commit32c12787361b65bbc55a7b9c1fe43689cb0a8b98 (patch)
tree047f47b5146b9192d74bd8ef1d9af9d183b7aad1 /ext/node
parent620e6b43a66c2af44ae4aea62417af408309f61c (diff)
feat(ext/node): buffer.transcode() (#25972)
Closes https://github.com/denoland/deno/issues/25911
Diffstat (limited to 'ext/node')
-rw-r--r--ext/node/lib.rs1
-rw-r--r--ext/node/ops/buffer.rs106
-rw-r--r--ext/node/polyfills/buffer.ts1
-rw-r--r--ext/node/polyfills/internal/buffer.mjs51
4 files changed, 157 insertions, 2 deletions
diff --git a/ext/node/lib.rs b/ext/node/lib.rs
index 0c821ecf8..d23c07204 100644
--- a/ext/node/lib.rs
+++ b/ext/node/lib.rs
@@ -167,6 +167,7 @@ deno_core::extension!(deno_node,
ops::buffer::op_is_ascii,
ops::buffer::op_is_utf8,
+ ops::buffer::op_transcode,
ops::crypto::op_node_check_prime_async,
ops::crypto::op_node_check_prime_bytes_async,
ops::crypto::op_node_check_prime_bytes,
diff --git a/ext/node/ops/buffer.rs b/ext/node/ops/buffer.rs
index 74a011ab8..01f878ec1 100644
--- a/ext/node/ops/buffer.rs
+++ b/ext/node/ops/buffer.rs
@@ -1,5 +1,7 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
+use deno_core::anyhow::anyhow;
+use deno_core::anyhow::Result;
use deno_core::op2;
#[op2(fast)]
@@ -11,3 +13,107 @@ pub fn op_is_ascii(#[buffer] buf: &[u8]) -> bool {
pub fn op_is_utf8(#[buffer] buf: &[u8]) -> bool {
std::str::from_utf8(buf).is_ok()
}
+
+#[op2]
+#[buffer]
+pub fn op_transcode(
+ #[buffer] source: &[u8],
+ #[string] from_encoding: &str,
+ #[string] to_encoding: &str,
+) -> Result<Vec<u8>> {
+ match (from_encoding, to_encoding) {
+ ("utf8", "ascii") => Ok(utf8_to_ascii(source)),
+ ("utf8", "latin1") => Ok(utf8_to_latin1(source)),
+ ("utf8", "utf16le") => utf8_to_utf16le(source),
+ ("utf16le", "utf8") => utf16le_to_utf8(source),
+ ("latin1", "utf16le") | ("ascii", "utf16le") => {
+ Ok(latin1_ascii_to_utf16le(source))
+ }
+ (from, to) => Err(anyhow!("Unable to transcode Buffer {from}->{to}")),
+ }
+}
+
+fn latin1_ascii_to_utf16le(source: &[u8]) -> Vec<u8> {
+ let mut result = Vec::with_capacity(source.len() * 2);
+ for &byte in source {
+ result.push(byte);
+ result.push(0);
+ }
+ result
+}
+
+fn utf16le_to_utf8(source: &[u8]) -> Result<Vec<u8>> {
+ let ucs2_vec: Vec<u16> = source
+ .chunks(2)
+ .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
+ .collect();
+ String::from_utf16(&ucs2_vec)
+ .map(|utf8_string| utf8_string.into_bytes())
+ .map_err(|e| anyhow!("Invalid UTF-16 sequence: {}", e))
+}
+
+fn utf8_to_utf16le(source: &[u8]) -> Result<Vec<u8>> {
+ let utf8_string = std::str::from_utf8(source)?;
+ let ucs2_vec: Vec<u16> = utf8_string.encode_utf16().collect();
+ let bytes: Vec<u8> = ucs2_vec.iter().flat_map(|&x| x.to_le_bytes()).collect();
+ Ok(bytes)
+}
+
+fn utf8_to_latin1(source: &[u8]) -> Vec<u8> {
+ let mut latin1_bytes = Vec::with_capacity(source.len());
+ let mut i = 0;
+ while i < source.len() {
+ match source[i] {
+ byte if byte <= 0x7F => {
+ // ASCII character
+ latin1_bytes.push(byte);
+ i += 1;
+ }
+ byte if (0xC2..=0xDF).contains(&byte) && i + 1 < source.len() => {
+ // 2-byte UTF-8 sequence
+ let codepoint =
+ ((byte as u16 & 0x1F) << 6) | (source[i + 1] as u16 & 0x3F);
+ latin1_bytes.push(if codepoint <= 0xFF {
+ codepoint as u8
+ } else {
+ b'?'
+ });
+ i += 2;
+ }
+ _ => {
+ // 3-byte or 4-byte UTF-8 sequence, or invalid UTF-8
+ latin1_bytes.push(b'?');
+ // Skip to the next valid UTF-8 start byte
+ i += 1;
+ while i < source.len() && (source[i] & 0xC0) == 0x80 {
+ i += 1;
+ }
+ }
+ }
+ }
+ latin1_bytes
+}
+
+fn utf8_to_ascii(source: &[u8]) -> Vec<u8> {
+ let mut ascii_bytes = Vec::with_capacity(source.len());
+ let mut i = 0;
+ while i < source.len() {
+ match source[i] {
+ byte if byte <= 0x7F => {
+ // ASCII character
+ ascii_bytes.push(byte);
+ i += 1;
+ }
+ _ => {
+ // Non-ASCII character
+ ascii_bytes.push(b'?');
+ // Skip to the next valid UTF-8 start byte
+ i += 1;
+ while i < source.len() && (source[i] & 0xC0) == 0x80 {
+ i += 1;
+ }
+ }
+ }
+ }
+ ascii_bytes
+}
diff --git a/ext/node/polyfills/buffer.ts b/ext/node/polyfills/buffer.ts
index 8986cf53d..efe3b07a9 100644
--- a/ext/node/polyfills/buffer.ts
+++ b/ext/node/polyfills/buffer.ts
@@ -13,4 +13,5 @@ export {
kMaxLength,
kStringMaxLength,
SlowBuffer,
+ transcode,
} from "ext:deno_node/internal/buffer.mjs";
diff --git a/ext/node/polyfills/internal/buffer.mjs b/ext/node/polyfills/internal/buffer.mjs
index 6e43a4903..6687f7394 100644
--- a/ext/node/polyfills/internal/buffer.mjs
+++ b/ext/node/polyfills/internal/buffer.mjs
@@ -6,7 +6,7 @@
// deno-lint-ignore-file prefer-primordials
import { core } from "ext:core/mod.js";
-import { op_is_ascii, op_is_utf8 } from "ext:core/ops";
+import { op_is_ascii, op_is_utf8, op_transcode } from "ext:core/ops";
import { TextDecoder, TextEncoder } from "ext:deno_web/08_text_encoding.js";
import { codes } from "ext:deno_node/internal/error_codes.ts";
@@ -32,7 +32,11 @@ import {
import { normalizeEncoding } from "ext:deno_node/internal/util.mjs";
import { validateBuffer } from "ext:deno_node/internal/validators.mjs";
import { isUint8Array } from "ext:deno_node/internal/util/types.ts";
-import { ERR_INVALID_STATE, NodeError } from "ext:deno_node/internal/errors.ts";
+import {
+ ERR_INVALID_STATE,
+ genericNodeError,
+ NodeError,
+} from "ext:deno_node/internal/errors.ts";
import {
forgivingBase64Encode,
forgivingBase64UrlEncode,
@@ -2598,6 +2602,48 @@ export function isAscii(input) {
], input);
}
+export function transcode(source, fromEnco, toEnco) {
+ if (!isUint8Array(source)) {
+ throw new codes.ERR_INVALID_ARG_TYPE(
+ "source",
+ ["Buffer", "Uint8Array"],
+ source,
+ );
+ }
+ if (source.length === 0) {
+ return Buffer.alloc(0);
+ }
+ const code = "U_ILLEGAL_ARGUMENT_ERROR";
+ const illegalArgumentError = genericNodeError(
+ `Unable to transcode Buffer [${code}]`,
+ { code: code, errno: 1 },
+ );
+ fromEnco = normalizeEncoding(fromEnco);
+ toEnco = normalizeEncoding(toEnco);
+ if (!fromEnco || !toEnco) {
+ throw illegalArgumentError;
+ }
+ // Return the provided source when transcode is not required
+ // for the from/to encoding pair.
+ const returnSource = fromEnco === toEnco ||
+ fromEnco === "ascii" && toEnco === "utf8" ||
+ fromEnco === "ascii" && toEnco === "latin1";
+ if (returnSource) {
+ return Buffer.from(source);
+ }
+
+ try {
+ const result = op_transcode(new Uint8Array(source), fromEnco, toEnco);
+ return Buffer.from(result, toEnco);
+ } catch (err) {
+ if (err.message.includes("Unable to transcode Buffer")) {
+ throw illegalArgumentError;
+ } else {
+ throw err;
+ }
+ }
+}
+
export default {
atob,
btoa,
@@ -2610,4 +2656,5 @@ export default {
kMaxLength,
kStringMaxLength,
SlowBuffer,
+ transcode,
};