diff options
Diffstat (limited to 'cli/js/text_encoding.ts')
-rw-r--r-- | cli/js/text_encoding.ts | 461 |
1 files changed, 0 insertions, 461 deletions
diff --git a/cli/js/text_encoding.ts b/cli/js/text_encoding.ts deleted file mode 100644 index 0709e7123..000000000 --- a/cli/js/text_encoding.ts +++ /dev/null @@ -1,461 +0,0 @@ -// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. -// The following code is based off of text-encoding at: -// https://github.com/inexorabletash/text-encoding -// -// Anyone is free to copy, modify, publish, use, compile, sell, or -// distribute this software, either in source code form or as a compiled -// binary, for any purpose, commercial or non-commercial, and by any -// means. -// -// In jurisdictions that recognize copyright laws, the author or authors -// of this software dedicate any and all copyright interest in the -// software to the public domain. We make this dedication for the benefit -// of the public at large and to the detriment of our heirs and -// successors. We intend this dedication to be an overt act of -// relinquishment in perpetuity of all present and future rights to this -// software under copyright law. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -import * as base64 from "./base64.ts"; -import { decodeUtf8 } from "./decode_utf8.ts"; -import * as domTypes from "./dom_types.ts"; -import { encodeUtf8 } from "./encode_utf8.ts"; - -const CONTINUE = null; -const END_OF_STREAM = -1; -const FINISHED = -1; - -function decoderError(fatal: boolean): number | never { - if (fatal) { - throw new TypeError("Decoder error."); - } - return 0xfffd; // default code point -} - -function inRange(a: number, min: number, max: number): boolean { - return min <= a && a <= max; -} - -function isASCIIByte(a: number): boolean { - return inRange(a, 0x00, 0x7f); -} - -function stringToCodePoints(input: string): number[] { - const u: number[] = []; - for (const c of input) { - u.push(c.codePointAt(0)!); - } - return u; -} - -class UTF8Encoder implements Encoder { - handler(codePoint: number): number | number[] { - if (codePoint === END_OF_STREAM) { - return FINISHED; - } - - if (inRange(codePoint, 0x00, 0x7f)) { - return codePoint; - } - - let count: number; - let offset: number; - if (inRange(codePoint, 0x0080, 0x07ff)) { - count = 1; - offset = 0xc0; - } else if (inRange(codePoint, 0x0800, 0xffff)) { - count = 2; - offset = 0xe0; - } else if (inRange(codePoint, 0x10000, 0x10ffff)) { - count = 3; - offset = 0xf0; - } else { - throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`); - } - - const bytes = [(codePoint >> (6 * count)) + offset]; - - while (count > 0) { - const temp = codePoint >> (6 * (count - 1)); - bytes.push(0x80 | (temp & 0x3f)); - count--; - } - - return bytes; - } -} - -/** Decodes a string of data which has been encoded using base-64. */ -export function atob(s: string): string { - s = String(s); - s = s.replace(/[\t\n\f\r ]/g, ""); - - if (s.length % 4 === 0) { - s = s.replace(/==?$/, ""); - } - - const rem = s.length % 4; - if (rem === 1 || /[^+/0-9A-Za-z]/.test(s)) { - // TODO: throw `DOMException` - throw new TypeError("The string to be decoded is not correctly encoded"); - } - - // base64-js requires length exactly times of 4 - if (rem > 0) { - s = s.padEnd(s.length + (4 - rem), "="); - } - - const byteArray: Uint8Array = base64.toByteArray(s); - let result = ""; - for (let i = 0; i < byteArray.length; i++) { - result += String.fromCharCode(byteArray[i]); - } - return result; -} - -/** Creates a base-64 ASCII string from the input string. */ -export function btoa(s: string): string { - const byteArray = []; - for (let i = 0; i < s.length; i++) { - const charCode = s[i].charCodeAt(0); - if (charCode > 0xff) { - throw new TypeError( - "The string to be encoded contains characters " + - "outside of the Latin1 range." - ); - } - byteArray.push(charCode); - } - const result = base64.fromByteArray(Uint8Array.from(byteArray)); - return result; -} - -interface DecoderOptions { - fatal?: boolean; - ignoreBOM?: boolean; -} - -interface Decoder { - handler(stream: Stream, byte: number): number | null; -} - -interface Encoder { - handler(codePoint: number): number | number[]; -} - -class SingleByteDecoder implements Decoder { - private _index: number[]; - private _fatal: boolean; - - constructor(index: number[], options: DecoderOptions) { - if (options.ignoreBOM) { - throw new TypeError("Ignoring the BOM is available only with utf-8."); - } - this._fatal = options.fatal || false; - this._index = index; - } - handler(stream: Stream, byte: number): number { - if (byte === END_OF_STREAM) { - return FINISHED; - } - if (isASCIIByte(byte)) { - return byte; - } - const codePoint = this._index[byte - 0x80]; - - if (codePoint == null) { - return decoderError(this._fatal); - } - - return codePoint; - } -} - -// The encodingMap is a hash of labels that are indexed by the conical -// encoding. -const encodingMap: { [key: string]: string[] } = { - "windows-1252": [ - "ansi_x3.4-1968", - "ascii", - "cp1252", - "cp819", - "csisolatin1", - "ibm819", - "iso-8859-1", - "iso-ir-100", - "iso8859-1", - "iso88591", - "iso_8859-1", - "iso_8859-1:1987", - "l1", - "latin1", - "us-ascii", - "windows-1252", - "x-cp1252" - ], - "utf-8": ["unicode-1-1-utf-8", "utf-8", "utf8"] -}; -// We convert these into a Map where every label resolves to its canonical -// encoding type. -const encodings = new Map<string, string>(); -for (const key of Object.keys(encodingMap)) { - const labels = encodingMap[key]; - for (const label of labels) { - encodings.set(label, key); - } -} - -// A map of functions that return new instances of a decoder indexed by the -// encoding type. -const decoders = new Map<string, (options: DecoderOptions) => Decoder>(); - -// Single byte decoders are an array of code point lookups -const encodingIndexes = new Map<string, number[]>(); -// prettier-ignore -encodingIndexes.set("windows-1252", [ - 8364,129,8218,402,8222,8230,8224,8225,710,8240,352,8249,338,141,381,143,144, - 8216,8217,8220,8221,8226,8211,8212,732,8482,353,8250,339,157,382,376,160,161, - 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180, - 181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199, - 200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218, - 219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237, - 238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 -]); -for (const [key, index] of encodingIndexes) { - decoders.set( - key, - (options: DecoderOptions): SingleByteDecoder => { - return new SingleByteDecoder(index, options); - } - ); -} - -function codePointsToString(codePoints: number[]): string { - let s = ""; - for (const cp of codePoints) { - s += String.fromCodePoint(cp); - } - return s; -} - -class Stream { - private _tokens: number[]; - constructor(tokens: number[] | Uint8Array) { - this._tokens = [].slice.call(tokens); - this._tokens.reverse(); - } - - endOfStream(): boolean { - return !this._tokens.length; - } - - read(): number { - return !this._tokens.length ? END_OF_STREAM : this._tokens.pop()!; - } - - prepend(token: number | number[]): void { - if (Array.isArray(token)) { - while (token.length) { - this._tokens.push(token.pop()!); - } - } else { - this._tokens.push(token); - } - } - - push(token: number | number[]): void { - if (Array.isArray(token)) { - while (token.length) { - this._tokens.unshift(token.shift()!); - } - } else { - this._tokens.unshift(token); - } - } -} - -export interface TextDecodeOptions { - stream?: false; -} - -export interface TextDecoderOptions { - fatal?: boolean; - ignoreBOM?: boolean; -} - -type EitherArrayBuffer = SharedArrayBuffer | ArrayBuffer; - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -function isEitherArrayBuffer(x: any): x is EitherArrayBuffer { - return x instanceof SharedArrayBuffer || x instanceof ArrayBuffer; -} - -export class TextDecoder { - private _encoding: string; - - /** Returns encoding's name, lowercased. */ - get encoding(): string { - return this._encoding; - } - /** Returns `true` if error mode is "fatal", and `false` otherwise. */ - readonly fatal: boolean = false; - /** Returns `true` if ignore BOM flag is set, and `false` otherwise. */ - readonly ignoreBOM: boolean = false; - - constructor(label = "utf-8", options: TextDecoderOptions = { fatal: false }) { - if (options.ignoreBOM) { - this.ignoreBOM = true; - } - if (options.fatal) { - this.fatal = true; - } - label = String(label) - .trim() - .toLowerCase(); - const encoding = encodings.get(label); - if (!encoding) { - throw new RangeError( - `The encoding label provided ('${label}') is invalid.` - ); - } - if (!decoders.has(encoding) && encoding !== "utf-8") { - throw new TypeError(`Internal decoder ('${encoding}') not found.`); - } - this._encoding = encoding; - } - - /** Returns the result of running encoding's decoder. */ - decode( - input?: domTypes.BufferSource, - options: TextDecodeOptions = { stream: false } - ): string { - if (options.stream) { - throw new TypeError("Stream not supported."); - } - - let bytes: Uint8Array; - if (input instanceof Uint8Array) { - bytes = input; - } else if (isEitherArrayBuffer(input)) { - bytes = new Uint8Array(input); - } else if ( - typeof input === "object" && - "buffer" in input && - isEitherArrayBuffer(input.buffer) - ) { - bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength); - } else { - bytes = new Uint8Array(0); - } - - // For performance reasons we utilise a highly optimised decoder instead of - // the general decoder. - if (this._encoding === "utf-8") { - return decodeUtf8(bytes, this.fatal, this.ignoreBOM); - } - - const decoder = decoders.get(this._encoding)!({ - fatal: this.fatal, - ignoreBOM: this.ignoreBOM - }); - const inputStream = new Stream(bytes); - const output: number[] = []; - - while (true) { - const result = decoder.handler(inputStream, inputStream.read()); - if (result === FINISHED) { - break; - } - - if (result !== CONTINUE) { - output.push(result); - } - } - - if (output.length > 0 && output[0] === 0xfeff) { - output.shift(); - } - - return codePointsToString(output); - } - - get [Symbol.toStringTag](): string { - return "TextDecoder"; - } -} - -interface TextEncoderEncodeIntoResult { - read: number; - written: number; -} - -export class TextEncoder { - /** Returns "utf-8". */ - readonly encoding = "utf-8"; - /** Returns the result of running UTF-8's encoder. */ - encode(input = ""): Uint8Array { - // For performance reasons we utilise a highly optimised decoder instead of - // the general decoder. - if (this.encoding === "utf-8") { - return encodeUtf8(input); - } - - const encoder = new UTF8Encoder(); - const inputStream = new Stream(stringToCodePoints(input)); - const output: number[] = []; - - while (true) { - const result = encoder.handler(inputStream.read()); - if (result === FINISHED) { - break; - } - if (Array.isArray(result)) { - output.push(...result); - } else { - output.push(result); - } - } - - return new Uint8Array(output); - } - encodeInto(input: string, dest: Uint8Array): TextEncoderEncodeIntoResult { - const encoder = new UTF8Encoder(); - const inputStream = new Stream(stringToCodePoints(input)); - - let written = 0; - let read = 0; - while (true) { - const result = encoder.handler(inputStream.read()); - if (result === FINISHED) { - break; - } - read++; - if (Array.isArray(result)) { - dest.set(result, written); - written += result.length; - if (result.length > 3) { - // increment read a second time if greater than U+FFFF - read++; - } - } else { - dest[written] = result; - written++; - } - } - - return { - read, - written - }; - } - get [Symbol.toStringTag](): string { - return "TextEncoder"; - } -} |