summaryrefslogtreecommitdiff
path: root/js/text_encoding.ts
diff options
context:
space:
mode:
Diffstat (limited to 'js/text_encoding.ts')
-rw-r--r--js/text_encoding.ts344
1 files changed, 171 insertions, 173 deletions
diff --git a/js/text_encoding.ts b/js/text_encoding.ts
index dd1b22d65..d0e08f73b 100644
--- a/js/text_encoding.ts
+++ b/js/text_encoding.ts
@@ -27,6 +27,153 @@ import * as base64 from "base64-js";
import * as domTypes from "./dom_types";
import { DenoError, ErrorKind } from "./errors";
+const CONTINUE = null;
+const END_OF_STREAM = -1;
+const FINISHED = -1;
+
+function decoderError(fatal: boolean): number | never {
+ if (fatal) {
+ throw new TypeError("Decoder error.");
+ }
+ return 0xfffd; // default code point
+}
+
+function inRange(a: number, min: number, max: number): boolean {
+ return min <= a && a <= max;
+}
+
+function isASCIIByte(a: number): boolean {
+ return inRange(a, 0x00, 0x7f);
+}
+
+function stringToCodePoints(input: string): number[] {
+ const u: number[] = [];
+ for (const c of input) {
+ u.push(c.codePointAt(0)!);
+ }
+ return u;
+}
+
+class UTF8Decoder implements Decoder {
+ private _codePoint = 0;
+ private _bytesSeen = 0;
+ private _bytesNeeded = 0;
+ private _fatal: boolean;
+ private _lowerBoundary = 0x80;
+ private _upperBoundary = 0xbf;
+
+ constructor(options: DecoderOptions) {
+ this._fatal = options.fatal || false;
+ }
+
+ handler(stream: Stream, byte: number): number | null {
+ if (byte === END_OF_STREAM && this._bytesNeeded !== 0) {
+ this._bytesNeeded = 0;
+ return decoderError(this._fatal);
+ }
+
+ if (byte === END_OF_STREAM) {
+ return FINISHED;
+ }
+
+ if (this._bytesNeeded === 0) {
+ if (isASCIIByte(byte)) {
+ // Single byte code point
+ return byte;
+ } else if (inRange(byte, 0xc2, 0xdf)) {
+ // Two byte code point
+ this._bytesNeeded = 1;
+ this._codePoint = byte & 0x1f;
+ } else if (inRange(byte, 0xe0, 0xef)) {
+ // Three byte code point
+ if (byte === 0xe0) {
+ this._lowerBoundary = 0xa0;
+ } else if (byte === 0xed) {
+ this._upperBoundary = 0x9f;
+ }
+ this._bytesNeeded = 2;
+ this._codePoint = byte & 0xf;
+ } else if (inRange(byte, 0xf0, 0xf4)) {
+ if (byte === 0xf0) {
+ this._lowerBoundary = 0x90;
+ } else if (byte === 0xf4) {
+ this._upperBoundary = 0x8f;
+ }
+ this._bytesNeeded = 3;
+ this._codePoint = byte & 0x7;
+ } else {
+ return decoderError(this._fatal);
+ }
+ return CONTINUE;
+ }
+
+ if (!inRange(byte, this._lowerBoundary, this._upperBoundary)) {
+ // Byte out of range, so encoding error
+ this._codePoint = 0;
+ this._bytesNeeded = 0;
+ this._bytesSeen = 0;
+ stream.prepend(byte);
+ return decoderError(this._fatal);
+ }
+
+ this._lowerBoundary = 0x80;
+ this._upperBoundary = 0xbf;
+
+ this._codePoint = (this._codePoint << 6) | (byte & 0x3f);
+
+ this._bytesSeen++;
+
+ if (this._bytesSeen !== this._bytesNeeded) {
+ return CONTINUE;
+ }
+
+ const codePoint = this._codePoint;
+
+ this._codePoint = 0;
+ this._bytesNeeded = 0;
+ this._bytesSeen = 0;
+
+ return codePoint;
+ }
+}
+
+class UTF8Encoder implements Encoder {
+ handler(codePoint: number): number | number[] {
+ if (codePoint === END_OF_STREAM) {
+ return FINISHED;
+ }
+
+ if (inRange(codePoint, 0x00, 0x7f)) {
+ return codePoint;
+ }
+
+ let count: number;
+ let offset: number;
+ if (inRange(codePoint, 0x0080, 0x07ff)) {
+ count = 1;
+ offset = 0xc0;
+ } else if (inRange(codePoint, 0x0800, 0xffff)) {
+ count = 2;
+ offset = 0xe0;
+ } else if (inRange(codePoint, 0x10000, 0x10ffff)) {
+ count = 3;
+ offset = 0xf0;
+ } else {
+ throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`);
+ }
+
+ const bytes = [(codePoint >> (6 * count)) + offset];
+
+ while (count > 0) {
+ const temp = codePoint >> (6 * (count - 1));
+ bytes.push(0x80 | (temp & 0x3f));
+ count--;
+ }
+
+ return bytes;
+ }
+}
+
/** Decodes a string of data which has been encoded using base-64. */
export function atob(s: string): string {
const rem = s.length % 4;
@@ -80,9 +227,30 @@ interface Encoder {
handler(codePoint: number): number | number[];
}
-const CONTINUE = null;
-const END_OF_STREAM = -1;
-const FINISHED = -1;
+class SingleByteDecoder implements Decoder {
+ private _index: number[];
+ private _fatal: boolean;
+
+ constructor(index: number[], options: DecoderOptions) {
+ this._fatal = options.fatal || false;
+ this._index = index;
+ }
+ handler(stream: Stream, byte: number): number {
+ if (byte === END_OF_STREAM) {
+ return FINISHED;
+ }
+ if (isASCIIByte(byte)) {
+ return byte;
+ }
+ const codePoint = this._index[byte - 0x80];
+
+ if (codePoint == null) {
+ return decoderError(this._fatal);
+ }
+
+ return codePoint;
+ }
+}
// The encodingMap is a hash of labels that are indexed by the conical
// encoding.
@@ -127,10 +295,8 @@ decoders.set("utf-8", (options: DecoderOptions) => {
// Single byte decoders are an array of code point lookups
const encodingIndexes = new Map<string, number[]>();
-// tslint:disable:max-line-length
// prettier-ignore
encodingIndexes.set("windows-1252", [8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,381,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,382,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255]);
-// tslint:enable
for (const [key, index] of encodingIndexes) {
decoders.set(key, (options: DecoderOptions) => {
return new SingleByteDecoder(index, options);
@@ -145,29 +311,6 @@ function codePointsToString(codePoints: number[]): string {
return s;
}
-function decoderError(fatal: boolean): number | never {
- if (fatal) {
- throw new TypeError("Decoder error.");
- }
- return 0xfffd; // default code point
-}
-
-function inRange(a: number, min: number, max: number) {
- return min <= a && a <= max;
-}
-
-function isASCIIByte(a: number) {
- return inRange(a, 0x00, 0x7f);
-}
-
-function stringToCodePoints(input: string): number[] {
- const u: number[] = [];
- for (const c of input) {
- u.push(c.codePointAt(0)!);
- }
- return u;
-}
-
class Stream {
private _tokens: number[];
constructor(tokens: number[] | Uint8Array) {
@@ -204,151 +347,6 @@ class Stream {
}
}
-class SingleByteDecoder implements Decoder {
- private _index: number[];
- private _fatal: boolean;
-
- constructor(index: number[], options: DecoderOptions) {
- this._fatal = options.fatal || false;
- this._index = index;
- }
- handler(stream: Stream, byte: number): number {
- if (byte === END_OF_STREAM) {
- return FINISHED;
- }
- if (isASCIIByte(byte)) {
- return byte;
- }
- const codePoint = this._index[byte - 0x80];
-
- if (codePoint == null) {
- return decoderError(this._fatal);
- }
-
- return codePoint;
- }
-}
-
-class UTF8Decoder implements Decoder {
- private _codePoint = 0;
- private _bytesSeen = 0;
- private _bytesNeeded = 0;
- private _fatal: boolean;
- private _lowerBoundary = 0x80;
- private _upperBoundary = 0xbf;
-
- constructor(options: DecoderOptions) {
- this._fatal = options.fatal || false;
- }
-
- handler(stream: Stream, byte: number): number | null {
- if (byte === END_OF_STREAM && this._bytesNeeded !== 0) {
- this._bytesNeeded = 0;
- return decoderError(this._fatal);
- }
-
- if (byte === END_OF_STREAM) {
- return FINISHED;
- }
-
- if (this._bytesNeeded === 0) {
- if (isASCIIByte(byte)) {
- // Single byte code point
- return byte;
- } else if (inRange(byte, 0xc2, 0xdf)) {
- // Two byte code point
- this._bytesNeeded = 1;
- this._codePoint = byte & 0x1f;
- } else if (inRange(byte, 0xe0, 0xef)) {
- // Three byte code point
- if (byte === 0xe0) {
- this._lowerBoundary = 0xa0;
- } else if (byte === 0xed) {
- this._upperBoundary = 0x9f;
- }
- this._bytesNeeded = 2;
- this._codePoint = byte & 0xf;
- } else if (inRange(byte, 0xf0, 0xf4)) {
- if (byte === 0xf0) {
- this._lowerBoundary = 0x90;
- } else if (byte === 0xf4) {
- this._upperBoundary = 0x8f;
- }
- this._bytesNeeded = 3;
- this._codePoint = byte & 0x7;
- } else {
- return decoderError(this._fatal);
- }
- return CONTINUE;
- }
-
- if (!inRange(byte, this._lowerBoundary, this._upperBoundary)) {
- // Byte out of range, so encoding error
- this._codePoint = 0;
- this._bytesNeeded = 0;
- this._bytesSeen = 0;
- stream.prepend(byte);
- return decoderError(this._fatal);
- }
-
- this._lowerBoundary = 0x80;
- this._upperBoundary = 0xbf;
-
- this._codePoint = (this._codePoint << 6) | (byte & 0x3f);
-
- this._bytesSeen++;
-
- if (this._bytesSeen !== this._bytesNeeded) {
- return CONTINUE;
- }
-
- const codePoint = this._codePoint;
-
- this._codePoint = 0;
- this._bytesNeeded = 0;
- this._bytesSeen = 0;
-
- return codePoint;
- }
-}
-
-class UTF8Encoder implements Encoder {
- handler(codePoint: number): number | number[] {
- if (codePoint === END_OF_STREAM) {
- return FINISHED;
- }
-
- if (inRange(codePoint, 0x00, 0x7f)) {
- return codePoint;
- }
-
- let count: number;
- let offset: number;
- if (inRange(codePoint, 0x0080, 0x07ff)) {
- count = 1;
- offset = 0xc0;
- } else if (inRange(codePoint, 0x0800, 0xffff)) {
- count = 2;
- offset = 0xe0;
- } else if (inRange(codePoint, 0x10000, 0x10ffff)) {
- count = 3;
- offset = 0xf0;
- } else {
- throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`);
- }
-
- const bytes = [(codePoint >> (6 * count)) + offset];
-
- while (count > 0) {
- const temp = codePoint >> (6 * (count - 1));
- bytes.push(0x80 | (temp & 0x3f));
- count--;
- }
-
- return bytes;
- }
-}
-
export interface TextDecodeOptions {
stream?: false;
}