diff options
Diffstat (limited to 'cli/js/web/text_encoding.ts')
-rw-r--r-- | cli/js/web/text_encoding.ts | 581 |
1 files changed, 0 insertions, 581 deletions
diff --git a/cli/js/web/text_encoding.ts b/cli/js/web/text_encoding.ts deleted file mode 100644 index 97848cb77..000000000 --- a/cli/js/web/text_encoding.ts +++ /dev/null @@ -1,581 +0,0 @@ -// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license. - -// The following code is based off of text-encoding at: -// https://github.com/inexorabletash/text-encoding -// -// Anyone is free to copy, modify, publish, use, compile, sell, or -// distribute this software, either in source code form or as a compiled -// binary, for any purpose, commercial or non-commercial, and by any -// means. -// -// In jurisdictions that recognize copyright laws, the author or authors -// of this software dedicate any and all copyright interest in the -// software to the public domain. We make this dedication for the benefit -// of the public at large and to the detriment of our heirs and -// successors. We intend this dedication to be an overt act of -// relinquishment in perpetuity of all present and future rights to this -// software under copyright law. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -import { DOMExceptionImpl as DOMException } from "./dom_exception.ts"; -import * as base64 from "./base64.ts"; -import { decodeUtf8 } from "./decode_utf8.ts"; -import { core } from "../core.ts"; - -const CONTINUE = null; -const END_OF_STREAM = -1; -const FINISHED = -1; - -function decoderError(fatal: boolean): number | never { - if (fatal) { - throw new TypeError("Decoder error."); - } - return 0xfffd; // default code point -} - -function inRange(a: number, min: number, max: number): boolean { - return min <= a && a <= max; -} - -function isASCIIByte(a: number): boolean { - return inRange(a, 0x00, 0x7f); -} - -function stringToCodePoints(input: string): number[] { - const u: number[] = []; - for (const c of input) { - u.push(c.codePointAt(0)!); - } - return u; -} - -class UTF8Encoder implements Encoder { - handler(codePoint: number): "finished" | number[] { - if (codePoint === END_OF_STREAM) { - return "finished"; - } - - if (inRange(codePoint, 0x00, 0x7f)) { - return [codePoint]; - } - - let count: number; - let offset: number; - if (inRange(codePoint, 0x0080, 0x07ff)) { - count = 1; - offset = 0xc0; - } else if (inRange(codePoint, 0x0800, 0xffff)) { - count = 2; - offset = 0xe0; - } else if (inRange(codePoint, 0x10000, 0x10ffff)) { - count = 3; - offset = 0xf0; - } else { - throw TypeError(`Code point out of range: \\x${codePoint.toString(16)}`); - } - - const bytes = [(codePoint >> (6 * count)) + offset]; - - while (count > 0) { - const temp = codePoint >> (6 * (count - 1)); - bytes.push(0x80 | (temp & 0x3f)); - count--; - } - - return bytes; - } -} - -export function atob(s: string): string { - s = String(s); - s = s.replace(/[\t\n\f\r ]/g, ""); - - if (s.length % 4 === 0) { - s = s.replace(/==?$/, ""); - } - - const rem = s.length % 4; - if (rem === 1 || /[^+/0-9A-Za-z]/.test(s)) { - throw new DOMException( - "The string to be decoded is not correctly encoded", - "DataDecodeError", - ); - } - - // base64-js requires length exactly times of 4 - if (rem > 0) { - s = s.padEnd(s.length + (4 - rem), "="); - } - - const byteArray: Uint8Array = base64.toByteArray(s); - let result = ""; - for (let i = 0; i < byteArray.length; i++) { - result += String.fromCharCode(byteArray[i]); - } - return result; -} - -export function btoa(s: string): string { - const byteArray = []; - for (let i = 0; i < s.length; i++) { - const charCode = s[i].charCodeAt(0); - if (charCode > 0xff) { - throw new TypeError( - "The string to be encoded contains characters " + - "outside of the Latin1 range.", - ); - } - byteArray.push(charCode); - } - const result = base64.fromByteArray(Uint8Array.from(byteArray)); - return result; -} - -interface DecoderOptions { - fatal?: boolean; - ignoreBOM?: boolean; -} - -interface Decoder { - handler(stream: Stream, byte: number): number | null; -} - -interface Encoder { - handler(codePoint: number): "finished" | number[]; -} - -class SingleByteDecoder implements Decoder { - readonly #index: number[]; - readonly #fatal: boolean; - - constructor( - index: number[], - { ignoreBOM = false, fatal = false }: DecoderOptions = {}, - ) { - if (ignoreBOM) { - throw new TypeError("Ignoring the BOM is available only with utf-8."); - } - this.#fatal = fatal; - this.#index = index; - } - handler(_stream: Stream, byte: number): number { - if (byte === END_OF_STREAM) { - return FINISHED; - } - if (isASCIIByte(byte)) { - return byte; - } - const codePoint = this.#index[byte - 0x80]; - - if (codePoint == null) { - return decoderError(this.#fatal); - } - - return codePoint; - } -} - -// The encodingMap is a hash of labels that are indexed by the conical -// encoding. -const encodingMap: { [key: string]: string[] } = { - "windows-1252": [ - "ansi_x3.4-1968", - "ascii", - "cp1252", - "cp819", - "csisolatin1", - "ibm819", - "iso-8859-1", - "iso-ir-100", - "iso8859-1", - "iso88591", - "iso_8859-1", - "iso_8859-1:1987", - "l1", - "latin1", - "us-ascii", - "windows-1252", - "x-cp1252", - ], - "utf-8": ["unicode-1-1-utf-8", "utf-8", "utf8"], -}; -// We convert these into a Map where every label resolves to its canonical -// encoding type. -const encodings = new Map<string, string>(); -for (const key of Object.keys(encodingMap)) { - const labels = encodingMap[key]; - for (const label of labels) { - encodings.set(label, key); - } -} - -// A map of functions that return new instances of a decoder indexed by the -// encoding type. -const decoders = new Map<string, (options: DecoderOptions) => Decoder>(); - -// Single byte decoders are an array of code point lookups -const encodingIndexes = new Map<string, number[]>(); -// deno-fmt-ignore -encodingIndexes.set("windows-1252", [ - 8364, - 129, - 8218, - 402, - 8222, - 8230, - 8224, - 8225, - 710, - 8240, - 352, - 8249, - 338, - 141, - 381, - 143, - 144, - 8216, - 8217, - 8220, - 8221, - 8226, - 8211, - 8212, - 732, - 8482, - 353, - 8250, - 339, - 157, - 382, - 376, - 160, - 161, - 162, - 163, - 164, - 165, - 166, - 167, - 168, - 169, - 170, - 171, - 172, - 173, - 174, - 175, - 176, - 177, - 178, - 179, - 180, - 181, - 182, - 183, - 184, - 185, - 186, - 187, - 188, - 189, - 190, - 191, - 192, - 193, - 194, - 195, - 196, - 197, - 198, - 199, - 200, - 201, - 202, - 203, - 204, - 205, - 206, - 207, - 208, - 209, - 210, - 211, - 212, - 213, - 214, - 215, - 216, - 217, - 218, - 219, - 220, - 221, - 222, - 223, - 224, - 225, - 226, - 227, - 228, - 229, - 230, - 231, - 232, - 233, - 234, - 235, - 236, - 237, - 238, - 239, - 240, - 241, - 242, - 243, - 244, - 245, - 246, - 247, - 248, - 249, - 250, - 251, - 252, - 253, - 254, - 255, -]); -for (const [key, index] of encodingIndexes) { - decoders.set( - key, - (options: DecoderOptions): SingleByteDecoder => { - return new SingleByteDecoder(index, options); - }, - ); -} - -function codePointsToString(codePoints: number[]): string { - let s = ""; - for (const cp of codePoints) { - s += String.fromCodePoint(cp); - } - return s; -} - -class Stream { - #tokens: number[]; - constructor(tokens: number[] | Uint8Array) { - this.#tokens = [...tokens]; - this.#tokens.reverse(); - } - - endOfStream(): boolean { - return !this.#tokens.length; - } - - read(): number { - return !this.#tokens.length ? END_OF_STREAM : this.#tokens.pop()!; - } - - prepend(token: number | number[]): void { - if (Array.isArray(token)) { - while (token.length) { - this.#tokens.push(token.pop()!); - } - } else { - this.#tokens.push(token); - } - } - - push(token: number | number[]): void { - if (Array.isArray(token)) { - while (token.length) { - this.#tokens.unshift(token.shift()!); - } - } else { - this.#tokens.unshift(token); - } - } -} - -export interface TextDecodeOptions { - stream?: false; -} - -export interface TextDecoderOptions { - fatal?: boolean; - ignoreBOM?: boolean; -} - -type EitherArrayBuffer = SharedArrayBuffer | ArrayBuffer; - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -function isEitherArrayBuffer(x: any): x is EitherArrayBuffer { - return x instanceof SharedArrayBuffer || x instanceof ArrayBuffer; -} - -export class TextDecoder { - readonly #encoding: string; - - get encoding(): string { - return this.#encoding; - } - readonly fatal: boolean = false; - readonly ignoreBOM: boolean = false; - - constructor(label = "utf-8", options: TextDecoderOptions = { fatal: false }) { - if (options.ignoreBOM) { - this.ignoreBOM = true; - } - if (options.fatal) { - this.fatal = true; - } - label = String(label).trim().toLowerCase(); - const encoding = encodings.get(label); - if (!encoding) { - throw new RangeError( - `The encoding label provided ('${label}') is invalid.`, - ); - } - if (!decoders.has(encoding) && encoding !== "utf-8") { - throw new TypeError(`Internal decoder ('${encoding}') not found.`); - } - this.#encoding = encoding; - } - - decode( - input?: BufferSource, - options: TextDecodeOptions = { stream: false }, - ): string { - if (options.stream) { - throw new TypeError("Stream not supported."); - } - - let bytes: Uint8Array; - if (input instanceof Uint8Array) { - bytes = input; - } else if (isEitherArrayBuffer(input)) { - bytes = new Uint8Array(input); - } else if ( - typeof input === "object" && - "buffer" in input && - isEitherArrayBuffer(input.buffer) - ) { - bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength); - } else { - bytes = new Uint8Array(0); - } - - // For simple utf-8 decoding "Deno.core.decode" can be used for performance - if ( - this.#encoding === "utf-8" && - this.fatal === false && - this.ignoreBOM === false - ) { - return core.decode(bytes); - } - - // For performance reasons we utilise a highly optimised decoder instead of - // the general decoder. - if (this.#encoding === "utf-8") { - return decodeUtf8(bytes, this.fatal, this.ignoreBOM); - } - - const decoder = decoders.get(this.#encoding)!({ - fatal: this.fatal, - ignoreBOM: this.ignoreBOM, - }); - const inputStream = new Stream(bytes); - const output: number[] = []; - - while (true) { - const result = decoder.handler(inputStream, inputStream.read()); - if (result === FINISHED) { - break; - } - - if (result !== CONTINUE) { - output.push(result); - } - } - - if (output.length > 0 && output[0] === 0xfeff) { - output.shift(); - } - - return codePointsToString(output); - } - - get [Symbol.toStringTag](): string { - return "TextDecoder"; - } -} - -interface TextEncoderEncodeIntoResult { - read: number; - written: number; -} - -export class TextEncoder { - readonly encoding = "utf-8"; - encode(input = ""): Uint8Array { - // Deno.core.encode() provides very efficient utf-8 encoding - if (this.encoding === "utf-8") { - return core.encode(input); - } - - const encoder = new UTF8Encoder(); - const inputStream = new Stream(stringToCodePoints(input)); - const output: number[] = []; - - while (true) { - const result = encoder.handler(inputStream.read()); - if (result === "finished") { - break; - } - output.push(...result); - } - - return new Uint8Array(output); - } - encodeInto(input: string, dest: Uint8Array): TextEncoderEncodeIntoResult { - const encoder = new UTF8Encoder(); - const inputStream = new Stream(stringToCodePoints(input)); - - let written = 0; - let read = 0; - while (true) { - const result = encoder.handler(inputStream.read()); - if (result === "finished") { - break; - } - if (dest.length - written >= result.length) { - read++; - dest.set(result, written); - written += result.length; - if (result.length > 3) { - // increment read a second time if greater than U+FFFF - read++; - } - } else { - break; - } - } - - return { - read, - written, - }; - } - get [Symbol.toStringTag](): string { - return "TextEncoder"; - } -} |