diff options
Diffstat (limited to 'js/text_encoding.ts')
-rw-r--r-- | js/text_encoding.ts | 344 |
1 files changed, 171 insertions, 173 deletions
diff --git a/js/text_encoding.ts b/js/text_encoding.ts index dd1b22d65..d0e08f73b 100644 --- a/js/text_encoding.ts +++ b/js/text_encoding.ts @@ -27,6 +27,153 @@ import * as base64 from "base64-js"; import * as domTypes from "./dom_types"; import { DenoError, ErrorKind } from "./errors"; +const CONTINUE = null; +const END_OF_STREAM = -1; +const FINISHED = -1; + +function decoderError(fatal: boolean): number | never { + if (fatal) { + throw new TypeError("Decoder error."); + } + return 0xfffd; // default code point +} + +function inRange(a: number, min: number, max: number): boolean { + return min <= a && a <= max; +} + +function isASCIIByte(a: number): boolean { + return inRange(a, 0x00, 0x7f); +} + +function stringToCodePoints(input: string): number[] { + const u: number[] = []; + for (const c of input) { + u.push(c.codePointAt(0)!); + } + return u; +} + +class UTF8Decoder implements Decoder { + private _codePoint = 0; + private _bytesSeen = 0; + private _bytesNeeded = 0; + private _fatal: boolean; + private _lowerBoundary = 0x80; + private _upperBoundary = 0xbf; + + constructor(options: DecoderOptions) { + this._fatal = options.fatal || false; + } + + handler(stream: Stream, byte: number): number | null { + if (byte === END_OF_STREAM && this._bytesNeeded !== 0) { + this._bytesNeeded = 0; + return decoderError(this._fatal); + } + + if (byte === END_OF_STREAM) { + return FINISHED; + } + + if (this._bytesNeeded === 0) { + if (isASCIIByte(byte)) { + // Single byte code point + return byte; + } else if (inRange(byte, 0xc2, 0xdf)) { + // Two byte code point + this._bytesNeeded = 1; + this._codePoint = byte & 0x1f; + } else if (inRange(byte, 0xe0, 0xef)) { + // Three byte code point + if (byte === 0xe0) { + this._lowerBoundary = 0xa0; + } else if (byte === 0xed) { + this._upperBoundary = 0x9f; + } + this._bytesNeeded = 2; + this._codePoint = byte & 0xf; + } else if (inRange(byte, 0xf0, 0xf4)) { + if (byte === 0xf0) { + this._lowerBoundary = 0x90; + } else if (byte === 0xf4) { + this._upperBoundary = 0x8f; + } + this._bytesNeeded = 3; + this._codePoint = byte & 0x7; + } else { + return decoderError(this._fatal); + } + return CONTINUE; + } + + if (!inRange(byte, this._lowerBoundary, this._upperBoundary)) { + // Byte out of range, so encoding error + this._codePoint = 0; + this._bytesNeeded = 0; + this._bytesSeen = 0; + stream.prepend(byte); + return decoderError(this._fatal); + } + + this._lowerBoundary = 0x80; + this._upperBoundary = 0xbf; + + this._codePoint = (this._codePoint << 6) | (byte & 0x3f); + + this._bytesSeen++; + + if (this._bytesSeen !== this._bytesNeeded) { + return CONTINUE; + } + + const codePoint = this._codePoint; + + this._codePoint = 0; + this._bytesNeeded = 0; + this._bytesSeen = 0; + + return codePoint; + } +} + +class UTF8Encoder implements Encoder { + handler(codePoint: number): number | number[] { + if (codePoint === END_OF_STREAM) { + return FINISHED; + } + + if (inRange(codePoint, 0x00, 0x7f)) { + return codePoint; + } + + let count: number; + let offset: number; + if (inRange(codePoint, 0x0080, 0x07ff)) { + count = 1; + offset = 0xc0; + } else if (inRange(codePoint, 0x0800, 0xffff)) { + count = 2; + offset = 0xe0; + } else if (inRange(codePoint, 0x10000, 0x10ffff)) { + count = 3; + offset = 0xf0; + } else { + throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`); + } + + const bytes = [(codePoint >> (6 * count)) + offset]; + + while (count > 0) { + const temp = codePoint >> (6 * (count - 1)); + bytes.push(0x80 | (temp & 0x3f)); + count--; + } + + return bytes; + } +} + /** Decodes a string of data which has been encoded using base-64. */ export function atob(s: string): string { const rem = s.length % 4; @@ -80,9 +227,30 @@ interface Encoder { handler(codePoint: number): number | number[]; } -const CONTINUE = null; -const END_OF_STREAM = -1; -const FINISHED = -1; +class SingleByteDecoder implements Decoder { + private _index: number[]; + private _fatal: boolean; + + constructor(index: number[], options: DecoderOptions) { + this._fatal = options.fatal || false; + this._index = index; + } + handler(stream: Stream, byte: number): number { + if (byte === END_OF_STREAM) { + return FINISHED; + } + if (isASCIIByte(byte)) { + return byte; + } + const codePoint = this._index[byte - 0x80]; + + if (codePoint == null) { + return decoderError(this._fatal); + } + + return codePoint; + } +} // The encodingMap is a hash of labels that are indexed by the conical // encoding. @@ -127,10 +295,8 @@ decoders.set("utf-8", (options: DecoderOptions) => { // Single byte decoders are an array of code point lookups const encodingIndexes = new Map<string, number[]>(); -// tslint:disable:max-line-length // prettier-ignore encodingIndexes.set("windows-1252", [8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,381,143,144,8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,382,376,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255]); -// tslint:enable for (const [key, index] of encodingIndexes) { decoders.set(key, (options: DecoderOptions) => { return new SingleByteDecoder(index, options); @@ -145,29 +311,6 @@ function codePointsToString(codePoints: number[]): string { return s; } -function decoderError(fatal: boolean): number | never { - if (fatal) { - throw new TypeError("Decoder error."); - } - return 0xfffd; // default code point -} - -function inRange(a: number, min: number, max: number) { - return min <= a && a <= max; -} - -function isASCIIByte(a: number) { - return inRange(a, 0x00, 0x7f); -} - -function stringToCodePoints(input: string): number[] { - const u: number[] = []; - for (const c of input) { - u.push(c.codePointAt(0)!); - } - return u; -} - class Stream { private _tokens: number[]; constructor(tokens: number[] | Uint8Array) { @@ -204,151 +347,6 @@ class Stream { } } -class SingleByteDecoder implements Decoder { - private _index: number[]; - private _fatal: boolean; - - constructor(index: number[], options: DecoderOptions) { - this._fatal = options.fatal || false; - this._index = index; - } - handler(stream: Stream, byte: number): number { - if (byte === END_OF_STREAM) { - return FINISHED; - } - if (isASCIIByte(byte)) { - return byte; - } - const codePoint = this._index[byte - 0x80]; - - if (codePoint == null) { - return decoderError(this._fatal); - } - - return codePoint; - } -} - -class UTF8Decoder implements Decoder { - private _codePoint = 0; - private _bytesSeen = 0; - private _bytesNeeded = 0; - private _fatal: boolean; - private _lowerBoundary = 0x80; - private _upperBoundary = 0xbf; - - constructor(options: DecoderOptions) { - this._fatal = options.fatal || false; - } - - handler(stream: Stream, byte: number): number | null { - if (byte === END_OF_STREAM && this._bytesNeeded !== 0) { - this._bytesNeeded = 0; - return decoderError(this._fatal); - } - - if (byte === END_OF_STREAM) { - return FINISHED; - } - - if (this._bytesNeeded === 0) { - if (isASCIIByte(byte)) { - // Single byte code point - return byte; - } else if (inRange(byte, 0xc2, 0xdf)) { - // Two byte code point - this._bytesNeeded = 1; - this._codePoint = byte & 0x1f; - } else if (inRange(byte, 0xe0, 0xef)) { - // Three byte code point - if (byte === 0xe0) { - this._lowerBoundary = 0xa0; - } else if (byte === 0xed) { - this._upperBoundary = 0x9f; - } - this._bytesNeeded = 2; - this._codePoint = byte & 0xf; - } else if (inRange(byte, 0xf0, 0xf4)) { - if (byte === 0xf0) { - this._lowerBoundary = 0x90; - } else if (byte === 0xf4) { - this._upperBoundary = 0x8f; - } - this._bytesNeeded = 3; - this._codePoint = byte & 0x7; - } else { - return decoderError(this._fatal); - } - return CONTINUE; - } - - if (!inRange(byte, this._lowerBoundary, this._upperBoundary)) { - // Byte out of range, so encoding error - this._codePoint = 0; - this._bytesNeeded = 0; - this._bytesSeen = 0; - stream.prepend(byte); - return decoderError(this._fatal); - } - - this._lowerBoundary = 0x80; - this._upperBoundary = 0xbf; - - this._codePoint = (this._codePoint << 6) | (byte & 0x3f); - - this._bytesSeen++; - - if (this._bytesSeen !== this._bytesNeeded) { - return CONTINUE; - } - - const codePoint = this._codePoint; - - this._codePoint = 0; - this._bytesNeeded = 0; - this._bytesSeen = 0; - - return codePoint; - } -} - -class UTF8Encoder implements Encoder { - handler(codePoint: number): number | number[] { - if (codePoint === END_OF_STREAM) { - return FINISHED; - } - - if (inRange(codePoint, 0x00, 0x7f)) { - return codePoint; - } - - let count: number; - let offset: number; - if (inRange(codePoint, 0x0080, 0x07ff)) { - count = 1; - offset = 0xc0; - } else if (inRange(codePoint, 0x0800, 0xffff)) { - count = 2; - offset = 0xe0; - } else if (inRange(codePoint, 0x10000, 0x10ffff)) { - count = 3; - offset = 0xf0; - } else { - throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`); - } - - const bytes = [(codePoint >> (6 * count)) + offset]; - - while (count > 0) { - const temp = codePoint >> (6 * (count - 1)); - bytes.push(0x80 | (temp & 0x3f)); - count--; - } - - return bytes; - } -} - export interface TextDecodeOptions { stream?: false; } |