summaryrefslogtreecommitdiff
path: root/js
diff options
context:
space:
mode:
authorTomohito Nakayama <nkym.tmht@gmail.com>2019-10-02 09:08:51 +0900
committerRyan Dahl <ry@tinyclouds.org>2019-10-01 20:08:51 -0400
commita646c2a88505819e07b5b967b9f8afacbac5aeef (patch)
tree4ca7f99e29fc59c7efa1d16261579bb6bba6c4c9 /js
parent75eeac03f31521dff1ef7db9ff2a9cb32a97b111 (diff)
Implement ignoreBOM option of UTF8Decoder in text_encoding (#3040)
Diffstat (limited to 'js')
-rw-r--r--js/lib.deno_runtime.d.ts2
-rw-r--r--js/text_encoding.ts37
-rw-r--r--js/text_encoding_test.ts26
3 files changed, 60 insertions, 5 deletions
diff --git a/js/lib.deno_runtime.d.ts b/js/lib.deno_runtime.d.ts
index 8eb46b410..1e220a29d 100644
--- a/js/lib.deno_runtime.d.ts
+++ b/js/lib.deno_runtime.d.ts
@@ -2372,7 +2372,7 @@ declare namespace textEncoding {
}
export interface TextDecoderOptions {
fatal?: boolean;
- ignoreBOM?: false;
+ ignoreBOM?: boolean;
}
export class TextDecoder {
private _encoding;
diff --git a/js/text_encoding.ts b/js/text_encoding.ts
index a956cd52c..8386ff8b0 100644
--- a/js/text_encoding.ts
+++ b/js/text_encoding.ts
@@ -59,11 +59,13 @@ class UTF8Decoder implements Decoder {
private _bytesSeen = 0;
private _bytesNeeded = 0;
private _fatal: boolean;
+ private _ignoreBOM: boolean;
private _lowerBoundary = 0x80;
private _upperBoundary = 0xbf;
constructor(options: DecoderOptions) {
this._fatal = options.fatal || false;
+ this._ignoreBOM = options.ignoreBOM || false;
}
handler(stream: Stream, byte: number): number | null {
@@ -76,6 +78,26 @@ class UTF8Decoder implements Decoder {
return FINISHED;
}
+ if (this._ignoreBOM) {
+ if (
+ (this._bytesSeen === 0 && byte !== 0xef) ||
+ (this._bytesSeen === 1 && byte !== 0xbb)
+ ) {
+ this._ignoreBOM = false;
+ }
+
+ if (this._bytesSeen === 2) {
+ this._ignoreBOM = false;
+ if (byte === 0xbf) {
+ //Ignore BOM
+ this._codePoint = 0;
+ this._bytesNeeded = 0;
+ this._bytesSeen = 0;
+ return CONTINUE;
+ }
+ }
+ }
+
if (this._bytesNeeded === 0) {
if (isASCIIByte(byte)) {
// Single byte code point
@@ -225,6 +247,7 @@ export function btoa(s: string): string {
interface DecoderOptions {
fatal?: boolean;
+ ignoreBOM?: boolean;
}
interface Decoder {
@@ -240,6 +263,9 @@ class SingleByteDecoder implements Decoder {
private _fatal: boolean;
constructor(index: number[], options: DecoderOptions) {
+ if (options.ignoreBOM) {
+ throw new TypeError("Ignoring the BOM is available only with utf-8.");
+ }
this._fatal = options.fatal || false;
this._index = index;
}
@@ -367,7 +393,7 @@ export interface TextDecodeOptions {
export interface TextDecoderOptions {
fatal?: boolean;
- ignoreBOM?: false;
+ ignoreBOM?: boolean;
}
type EitherArrayBuffer = SharedArrayBuffer | ArrayBuffer;
@@ -387,11 +413,11 @@ export class TextDecoder {
/** Returns `true` if error mode is "fatal", and `false` otherwise. */
readonly fatal: boolean = false;
/** Returns `true` if ignore BOM flag is set, and `false` otherwise. */
- readonly ignoreBOM = false;
+ readonly ignoreBOM: boolean = false;
constructor(label = "utf-8", options: TextDecoderOptions = { fatal: false }) {
if (options.ignoreBOM) {
- throw new TypeError("Ignoring the BOM not supported.");
+ this.ignoreBOM = true;
}
if (options.fatal) {
this.fatal = true;
@@ -435,7 +461,10 @@ export class TextDecoder {
bytes = new Uint8Array(0);
}
- const decoder = decoders.get(this._encoding)!({ fatal: this.fatal });
+ const decoder = decoders.get(this._encoding)!({
+ fatal: this.fatal,
+ ignoreBOM: this.ignoreBOM
+ });
const inputStream = new Stream(bytes);
const output: number[] = [];
diff --git a/js/text_encoding_test.ts b/js/text_encoding_test.ts
index 727424749..aaa9e6b9d 100644
--- a/js/text_encoding_test.ts
+++ b/js/text_encoding_test.ts
@@ -74,6 +74,32 @@ test(function textDecoder2(): void {
assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
});
+test(function textDecoderIgnoreBOM(): void {
+ // prettier-ignore
+ const fixture = new Uint8Array([
+ 0xef, 0xbb, 0xbf,
+ 0xf0, 0x9d, 0x93, 0xbd,
+ 0xf0, 0x9d, 0x93, 0xae,
+ 0xf0, 0x9d, 0x94, 0x81,
+ 0xf0, 0x9d, 0x93, 0xbd
+ ]);
+ const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
+ assertEquals(decoder.decode(fixture), "𝓽𝓮𝔁𝓽");
+});
+
+test(function textDecoderNotBOM(): void {
+ // prettier-ignore
+ const fixture = new Uint8Array([
+ 0xef, 0xbb, 0x89,
+ 0xf0, 0x9d, 0x93, 0xbd,
+ 0xf0, 0x9d, 0x93, 0xae,
+ 0xf0, 0x9d, 0x94, 0x81,
+ 0xf0, 0x9d, 0x93, 0xbd
+ ]);
+ const decoder = new TextDecoder("utf-8", { ignoreBOM: true });
+ assertEquals(decoder.decode(fixture), "ﻉ𝓽𝓮𝔁𝓽");
+});
+
test(function textDecoderASCII(): void {
const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
const decoder = new TextDecoder("ascii");